kozistr
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.pylintrc‎
Lines changed: 3 additions & 3 deletions b/‎.pylintrc‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎Makefile‎
Lines changed: 8 additions & 8 deletions b/‎Makefile‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎lint.py‎
Lines changed: 44 additions & 0 deletions b/‎lint.py‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎pytorch_optimizer/adabelief.py‎
Lines changed: 22 additions & 55 deletions b/‎pytorch_optimizer/adabelief.py‎
Lines changed: 22 additions & 55 deletions
@@ -33,3 +33,6 @@ jobs:
     - name: Lint
       run: |
           make format
+    - name: Check Lint
+      run: |
+          make check
@@ -15,7 +15,7 @@ ignore=
 ignore-patterns=ner_dependencies_codes
 
 # Use multiple processes to speed up Pylint.
-jobs=6
+jobs=8
 
 # List of plugins (as comma separated values of python modules names) to load,
 # usually to register additional checkers.
@@ -203,7 +203,7 @@ indent-after-paren=4
 indent-string='    '
 
 # Maximum number of characters on a single line.
-max-line-length=79
+max-line-length=119
 
 # Maximum number of lines in a module
 max-module-lines=800
@@ -378,7 +378,7 @@ max-bool-expr=5
 max-branches=10
 
 # Maximum number of locals for function / method body
-max-locals=20
+max-locals=30
 
 # Maximum number of parents for a class (see R0901).
 max-parents=10
 
@@ -1,13 +1,17 @@
-.PHONY: init check format requirements build deploy
+.PHONY: init format check build deploy requirements
 
 init:
 	python3 -m pip install -U pipenv setuptools wheel
 	python3 -m pipenv install --dev
 
+format:
+	isort --profile black -l 119 pytorch_optimizer setup.py lint.py
+	black -S -l 119 pytorch_optimizer setup.py lint.py
+
 check:
-	isort --check-only --profile black -l 79 pytorch_optimizer setup.py
-	black -S -l 79 --check pytorch_optimizer setup.py
-	pylint pytorch_optimizer
+	isort --check-only --profile black -l 119 pytorch_optimizer setup.py lint.py
+	black -S -l 119 --check pytorch_optimizer setup.py lint.py
+	python3 lint.py
 
 build:
 	python3 setup.py sdist bdist_wheel
@@ -16,10 +20,6 @@ deploy:
 	python3 -m twine check dist/*
 	python3 -m twine upload dist/*
 
-format:
-	isort --profile black -l 79 pytorch_optimizer setup.py
-	black -S -l 79 pytorch_optimizer setup.py
-
 requirements:
 	python3 -m pipenv lock -r > requirements.txt
 	python3 -m pipenv lock -dr > requirements-dev.txt
@@ -0,0 +1,44 @@
+from argparse import ArgumentParser, Namespace
+
+from pylint.lint import Run
+
+
+def get_configuration() -> Namespace:
+    parser = ArgumentParser(prog='LINT')
+    parser.add_argument(
+        '-p',
+        '--path',
+        help='path to directory you want to run pylint | ' 'Default: %(default)s | ' 'Type: %(type)s ',
+        default='pytorch_optimizer',
+        type=str,
+    )
+    parser.add_argument(
+        '-t',
+        '--threshold',
+        help='score threshold to fail pylint runner | ' 'Default: %(default)s | ' 'Type: %(type)s ',
+        default=9.5,
+        type=float,
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    args: Namespace = get_configuration()
+
+    path: str = str(args.path)
+    threshold: float = float(args.threshold)
+    print(f'PyLint Starting | path: {path} | threshold: {threshold:.2f}')
+
+    results = Run([path], do_exit=False)
+
+    final_score: float = results.linter.stats['global_note']
+    if final_score < threshold:
+        print(f'PyLint Failed | score: {final_score:.2f} | threshold: {threshold:.2f}')
+        raise Exception
+    else:
+        print(f'PyLint Passed | score: {final_score:.2f} | threshold: {threshold:.2f}')
+
+
+if __name__ == '__main__':
+    main()
@@ -3,14 +3,7 @@
 import torch
 from torch.optim.optimizer import Optimizer
 
-from pytorch_optimizer.types import (
-    BETAS,
-    CLOSURE,
-    DEFAULT_PARAMETERS,
-    LOSS,
-    PARAMS,
-    STATE,
-)
+from pytorch_optimizer.types import BETAS, CLOSURE, DEFAULTS, LOSS, PARAMETERS, STATE
 
 
 class AdaBelief(Optimizer):
@@ -31,60 +24,47 @@ class AdaBelief(Optimizer):
 
     def __init__(
         self,
-        params: PARAMS,
+        params: PARAMETERS,
         lr: float = 1e-3,
         betas: BETAS = (0.9, 0.999),
-        eps: float = 1e-16,
         weight_decay: float = 0.0,
         n_sma_threshold: int = 5,
-        amsgrad: bool = False,
         weight_decouple: bool = True,
         fixed_decay: bool = False,
         rectify: bool = True,
         degenerated_to_sgd: bool = True,
+        amsgrad: bool = False,
+        eps: float = 1e-16,
     ):
-        """AdaBelief optimizer
-        :param params: PARAMS. iterable of parameters to optimize
-            or dicts defining parameter groups
+        """
+        :param params: PARAMETERS. iterable of parameters to optimize or dicts defining parameter groups
         :param lr: float. learning rate
-        :param betas: BETAS. coefficients used for computing running averages
-            of gradient and the squared hessian trace
-        :param eps: float. term added to the denominator
-            to improve numerical stability
+        :param betas: BETAS. coefficients used for computing running averages of gradient and the squared hessian trace
         :param weight_decay: float. weight decay (L2 penalty)
         :param n_sma_threshold: (recommended is 5)
-        :param amsgrad: bool. whether to use the AMSBound variant
-        :param weight_decouple: bool. the optimizer uses decoupled weight decay
-            as in AdamW
+        :param weight_decouple: bool. the optimizer uses decoupled weight decay as in AdamW
         :param fixed_decay: bool.
         :param rectify: bool. perform the rectified update similar to RAdam
-        :param degenerated_to_sgd: bool. perform SGD update
-            when variance of gradient is high
+        :param degenerated_to_sgd: bool. perform SGD update when variance of gradient is high
+        :param amsgrad: bool. whether to use the AMSBound variant
+        :param eps: float. term added to the denominator to improve numerical stability
         """
         self.lr = lr
         self.betas = betas
-        self.eps = eps
         self.weight_decay = weight_decay
         self.n_sma_threshold = n_sma_threshold
-        self.degenerated_to_sgd = degenerated_to_sgd
         self.weight_decouple = weight_decouple
-        self.rectify = rectify
         self.fixed_decay = fixed_decay
+        self.rectify = rectify
         self.degenerated_to_sgd = degenerated_to_sgd
+        self.eps = eps
 
-        if (
-            isinstance(params, (list, tuple))
-            and len(params) > 0
-            and isinstance(params[0], dict)
-        ):
+        if isinstance(params, (list, tuple)) and len(params) > 0 and isinstance(params[0], dict):
             for param in params:
-                if 'betas' in param and (
-                    param['betas'][0] != betas[0]
-                    or param['betas'][1] != betas[1]
-                ):
+                if 'betas' in param and (param['betas'][0] != betas[0] or param['betas'][1] != betas[1]):
                     param['buffer'] = [[None, None, None] for _ in range(10)]
 
-        defaults: DEFAULT_PARAMETERS = dict(
+        defaults: DEFAULTS = dict(
             lr=lr,
             betas=betas,
             eps=eps,
@@ -129,9 +109,7 @@ def step(self, closure: CLOSURE = None) -> LOSS:
 
                 grad = p.grad.data
                 if grad.is_sparse:
-                    raise RuntimeError(
-                        'AdaBelief does not support sparse gradients'
-                    )
+                    raise RuntimeError('AdaBelief does not support sparse gradients')
 
                 amsgrad = group['amsgrad']
 
@@ -163,9 +141,7 @@ def step(self, closure: CLOSURE = None) -> LOSS:
 
                 exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
                 grad_residual = grad - exp_avg
-                exp_avg_var.mul_(beta2).addcmul_(
-                    grad_residual, grad_residual, value=1 - beta2
-                )
+                exp_avg_var.mul_(beta2).addcmul_(grad_residual, grad_residual, value=1 - beta2)
 
                 if amsgrad:
                     max_exp_avg_var = state['max_exp_avg_var']
@@ -176,14 +152,9 @@ def step(self, closure: CLOSURE = None) -> LOSS:
                         out=max_exp_avg_var,
                     )
 
-                    denom = (
-                        max_exp_avg_var.sqrt() / math.sqrt(bias_correction2)
-                    ).add_(group['eps'])
+                    denom = (max_exp_avg_var.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
                 else:
-                    denom = (
-                        exp_avg_var.add_(group['eps']).sqrt()
-                        / math.sqrt(bias_correction2)
-                    ).add_(group['eps'])
+                    denom = (exp_avg_var.add_(group['eps']).sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
 
                 if not self.rectify:
                     step_size = group['lr'] / bias_correction1
@@ -196,9 +167,7 @@ def step(self, closure: CLOSURE = None) -> LOSS:
                         buffered[0] = state['step']
                         beta2_t = beta2 ** state['step']
                         n_sma_max = 2 / (1 - beta2) - 1
-                        n_sma = n_sma_max - 2 * state['step'] * beta2_t / (
-                            1 - beta2_t
-                        )
+                        n_sma = n_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                         buffered[1] = n_sma
 
                         if n_sma >= self.n_sma_threshold:
@@ -219,9 +188,7 @@ def step(self, closure: CLOSURE = None) -> LOSS:
 
                     if n_sma >= self.n_sma_threshold:
                         denom = exp_avg_var.sqrt().add_(group['eps'])
-                        p.data.addcdiv_(
-                            exp_avg, denom, value=-step_size * group['lr']
-                        )
+                        p.data.addcdiv_(exp_avg, denom, value=-step_size * group['lr'])
                     elif step_size > 0:
                         p.data.add_(exp_avg, alpha=-step_size * group['lr'])