fix: accumulator is located on the CPU, not grad.device

kozistr · kozistr · commit 169ae964f6c3 · 2023-04-22T20:34:18.000+09:00
diff --git a/pytorch_optimizer/optimizer/sm3.py b/pytorch_optimizer/optimizer/sm3.py
@@ -98,12 +98,14 @@ def step(self, closure: CLOSURE = None) -> LOSS:
                     state['momentum_buffer'] = torch.zeros_like(p)
 
                     if grad.is_sparse:
-                        state['accumulator_0'] = torch.zeros(shape[0])
+                        state['accumulator_0'] = torch.zeros(shape[0], device=grad.device)
                     elif rank == 0:
-                        state['accumulator_0'] = torch.zeros(shape)
+                        state['accumulator_0'] = torch.zeros_like(p)
                     else:
                         for i in range(rank):
-                            state[f'accumulator_{i}'] = torch.zeros([1] * i + [shape[i]] + [1] * (rank - 1 - i))
+                            state[f'accumulator_{i}'] = torch.zeros(
+                                [1] * i + [shape[i]] + [1] * (rank - 1 - i), device=grad.device
+                            )
 
                 state['step'] += 1