Skip to content

50系显卡支持? #61

@NaughtDZ

Description

@NaughtDZ

我使用5090显卡进行生成,提示
Separator will use CUDA device: cuda:0
L:\SongGeneration\installer_files\env\Lib\site-packages\torch\cuda_init_.py:218: UserWarning:
NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 sm_80 sm_86 sm_90 compute_37.
If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
感受就是生成速度非常慢

此外还有个问题,上传参考音频生成,会报错ValueError: stat: path too long for Windows
具体控制台内容:
Traceback (most recent call last):
File "L:\SongGeneration\webui.py", line 521, in generate_music_from_ui
item = generate_full_process(item, cfg, ckpt_path, gen_type, max_duration, generation_params, progress)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\webui.py", line 240, in generate_full_process
pmt_wav, _ = audio_tokenizer.encode(pmt_wav.cuda())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\codeclm\tokenizer\audio_tokenizer.py", line 132, in encode
codes = self.model.sound2code(x) # [B T] -> [B N T]
^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\amp\autocast_mode.py", line 16, in decorate_autocast
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\codeclm\tokenizer\Flow1dVAE\generate_1rvq.py", line 122, in sound2code
codes, _, spk_embeds = self.model.fetch_codes_batch((audio_input[audio_inx:audio_inx+batch_size]), additional_feats=[],layer=self.layer_num)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\codeclm\tokenizer\Flow1dVAE\model_1rvq.py", line 575, in fetch_codes_batch
bestrq_emb = self.extract_bestrq_embeds(input_audio_0,input_audio_1,layer)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\codeclm\tokenizer\Flow1dVAE\model_1rvq.py", line 384, in extract_bestrq_embeds
input_wav_mean = self.bestrq(self.rsq48tobestrq(input_wav_mean), features_only = True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\nn\modules\module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\nn\modules\module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\codeclm\tokenizer\Flow1dVAE\our_MERT_BESTRQ\mert_fairseq\models\musicfm\musicfm_model.py", line 72, in forward
_, hidden_states = self.model.get_predictions(source)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\codeclm\tokenizer\Flow1dVAE\our_MERT_BESTRQ\mert_fairseq\models\musicfm\model\musicfm_25hz.py", line 226, in get_predictions
x = self.preprocessing(x, features=["melspec_2048"])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\utils_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\codeclm\tokenizer\Flow1dVAE\our_MERT_BESTRQ\mert_fairseq\models\musicfm\model\musicfm_25hz.py", line 177, in preprocessing
out[key] = layer.float()(x.float())[..., :-1]
^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\nn\modules\module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\nn\modules\module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\codeclm\tokenizer\Flow1dVAE\our_MERT_BESTRQ\mert_fairseq\models\musicfm\modules\features.py", line 43, in forward
return self.amplitude_to_db(self.mel_stft(waveform))
^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\nn\modules\module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\nn\modules\module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torchaudio\transforms_transforms.py", line 619, in forward
specgram = self.spectrogram(waveform)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\nn\modules\module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torch\nn\modules\module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torchaudio\transforms_transforms.py", line 110, in forward
return F.spectrogram(
^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\torchaudio\functional\functional.py", line 147, in spectrogram
return spec_f.abs().pow(power)
^^^^^^^^^^^^
RuntimeError:
#ifdef HIPCC
#define ERROR_UNSUPPORTED_CAST ;
// corresponds to aten/src/ATen/native/cuda/thread_constants.h
#define CUDA_OR_ROCM_NUM_THREADS 256
// corresponds to aten/src/ATen/cuda/detail/OffsetCalculator.cuh
#define MAX_DIMS 16
#ifndef forceinline
#define forceinline inline attribute((always_inline))
#endif
#else
//TODO use _assert_fail, because assert is disabled in non-debug builds
#define ERROR_UNSUPPORTED_CAST assert(false);
#define CUDA_OR_ROCM_NUM_THREADS 128
#define MAX_DIMS 25
#endif
#define POS_INFINITY __int_as_float(0x7f800000)
#define INFINITY POS_INFINITY
#define NEG_INFINITY __int_as_float(0xff800000)
#define NAN __int_as_float(0x7fffffff)

typedef long long int int64_t;
typedef unsigned int uint32_t;
typedef signed char int8_t;
typedef unsigned char uint8_t; // NOTE: this MUST be "unsigned char"! "char" is equivalent to "signed char"
typedef short int16_t;
static_assert(sizeof(int64_t) == 8, "expected size does not match");
static_assert(sizeof(uint32_t) == 4, "expected size does not match");
static_assert(sizeof(int8_t) == 1, "expected size does not match");
constexpr int num_threads = CUDA_OR_ROCM_NUM_THREADS;
constexpr int thread_work_size = 4; // TODO: make template substitution once we decide where those vars live
constexpr int block_work_size = thread_work_size * num_threads;

namespace std {

template
_Tp&& __declval(int);
template
_Tp __declval(long);
template
decltype(__declval<_Tp>(0)) declval() noexcept;

template <class _Tp, _Tp __v>
struct integral_constant {
static const _Tp value = __v;
typedef _Tp value_type;
typedef integral_constant type;
};

typedef integral_constant<bool, true> true_type;
typedef integral_constant<bool, false> false_type;

// is_same, functional
template <class _Tp, class _Up> struct is_same : public false_type {};
template struct is_same<_Tp, _Tp> : public true_type {};

// is_integral, for some types.
template struct is_integral
: public integral_constant<bool, false> {};
template <> struct is_integral
: public integral_constant<bool, true> {};
template <> struct is_integral
: public integral_constant<bool, true> {};
template <> struct is_integral
: public integral_constant<bool, true> {};
template <> struct is_integral
: public integral_constant<bool, true> {};
template <> struct is_integral
: public integral_constant<bool, true> {};
template <> struct is_integral
: public integral_constant<bool, true> {};

// enable_if, functional
template <bool _C, typename _Tp> struct enable_if{};
template struct enable_if<true, _Tp>{
using type = _Tp;
};
template <bool b, class T=void>
using enable_if_t = typename enable_if<b,T>::type;

template struct remove_const {typedef _Tp type;};
template struct remove_const {typedef _Tp type;};
template using remove_const_t = typename remove_const<_Tp>::type;

template struct remove_volatile {typedef _Tp type;};
template struct remove_volatile {typedef _Tp type;};
template using remove_volatile_t = typename remove_volatile<_Tp>::type;

template struct remove_cv
{typedef typename remove_volatile<typename remove_const<_Tp>::type>::type type;};
template using remove_cv_t = typename remove_cv<_Tp>::type;

template struct __libcpp_is_floating_point : public false_type {};
template <> struct __libcpp_is_floating_point : public true_type {};
template <> struct __libcpp_is_floating_point : public true_type {};
template <> struct __libcpp_is_floating_point : public true_type {};

template struct is_floating_point
: public __libcpp_is_floating_point<typename remove_cv<_Tp>::type> {};

template struct is_arithmetic
: public integral_constant<bool, is_integral<_Tp>::value ||
is_floating_point<_Tp>::value> {};
template
inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;

template
struct __numeric_type
{
static void __test(...);
static float __test(float);
static double __test(char);
static double __test(int);
static double __test(unsigned);
static double __test(long);
static double __test(unsigned long);
static double __test(long long);
static double __test(unsigned long long);
static double __test(double);
static long double __test(long double);

 typedef decltype(__test(declval<_Tp>())) type;
 static const bool value = !is_same<type, void>::value;

};

template <>
struct __numeric_type
{
static const bool value = true;
};

// __promote

template <class _A1, class _A2 = void, class _A3 = void,
bool = __numeric_type<_A1>::value &&
__numeric_type<_A2>::value &&
__numeric_type<_A3>::value>
class __promote_imp
{
public:
static const bool value = false;
};

template <class _A1, class _A2, class _A3>
class __promote_imp<_A1, _A2, _A3, true>
{
private:
typedef typename __promote_imp<_A1>::type __type1;
typedef typename __promote_imp<_A2>::type __type2;
typedef typename __promote_imp<_A3>::type __type3;
public:
typedef decltype(__type1() + __type2() + __type3()) type;
static const bool value = true;
};

template <class _A1, class _A2>
class __promote_imp<_A1, _A2, void, true>
{
private:
typedef typename __promote_imp<_A1>::type __type1;
typedef typename __promote_imp<_A2>::type __type2;
public:
typedef decltype(__type1() + __type2()) type;
static const bool value = true;
};

template
class __promote_imp<_A1, void, void, true>
{
public:
typedef typename __numeric_type<_A1>::type type;
static const bool value = true;
};

template <class _A1, class _A2 = void, class _A3 = void>
class __promote : public __promote_imp<_A1, _A2, _A3> {};

} // namespace std

namespace std {

using ::signbit;
using ::isfinite;
using ::isinf;
using ::isnan;

using ::abs;

using ::acos;
using ::acosf;
using ::asin;
using ::asinf;
using ::atan;
using ::atanf;
using ::atan2;
using ::atan2f;
using ::ceil;
using ::ceilf;
using ::cos;
using ::cosf;
using ::cosh;
using ::coshf;

using ::exp;
using ::expf;

using ::fabs;
using ::fabsf;
using ::floor;
using ::floorf;

using ::fmod;
using ::fmodf;

using ::frexp;
using ::frexpf;
using ::ldexp;
using ::ldexpf;

using ::log;
using ::logf;

using ::log10;
using ::log10f;
using ::modf;
using ::modff;

using ::pow;
using ::powf;

using ::sin;
using ::sinf;
using ::sinh;
using ::sinhf;

using ::sqrt;
using ::sqrtf;
using ::tan;
using ::tanf;

using ::tanh;
using ::tanhf;

using ::acosh;
using ::acoshf;
using ::asinh;
using ::asinhf;
using ::atanh;
using ::atanhf;
using ::cbrt;
using ::cbrtf;

using ::copysign;
using ::copysignf;

using ::erf;
using ::erff;
using ::erfc;
using ::erfcf;
using ::exp2;
using ::exp2f;
using ::expm1;
using ::expm1f;
using ::fdim;
using ::fdimf;
using ::fmaf;
using ::fma;
using ::fmax;
using ::fmaxf;
using ::fmin;
using ::fminf;
using ::hypot;
using ::hypotf;
using ::ilogb;
using ::ilogbf;
using ::lgamma;
using ::lgammaf;
using ::llrint;
using ::llrintf;
using ::llround;
using ::llroundf;
using ::log1p;
using ::log1pf;
using ::log2;
using ::log2f;
using ::logb;
using ::logbf;
using ::lrint;
using ::lrintf;
using ::lround;
using ::lroundf;

using ::nan;
using ::nanf;

using ::nearbyint;
using ::nearbyintf;
using ::nextafter;
using ::nextafterf;
using ::remainder;
using ::remainderf;
using ::remquo;
using ::remquof;
using ::rint;
using ::rintf;
using ::round;
using ::roundf;
using ::scalbln;
using ::scalblnf;
using ::scalbn;
using ::scalbnf;
using ::tgamma;
using ::tgammaf;
using ::trunc;
using ::truncf;

} // namespace std

// NB: Order matters for this macro; it is relied upon in
// promoteTypesLookup and the serialization format.
// Note, some types have ctype as void because we don't support them in codegen
#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(
)
_(uint8_t, Byte) /* 0 /
_(int8_t, Char) /
1 /
_(int16_t, Short) /
2 /
_(int, Int) /
3 /
_(int64_t, Long) /
4 /
_(at::Half, Half) /
5 /
_(float, Float) /
6 /
_(double, Double) /
7 /
_(std::complexat::Half, ComplexHalf) /
8 /
_(std::complex, ComplexFloat) /
9 /
_(std::complex, ComplexDouble) /
10 /
_(bool, Bool) /
11 /
_(void, QInt8) /
12 /
_(void, QUInt8) /
13 /
_(void, QInt32) /
14 /
_(at::BFloat16, BFloat16) /
15 */ \

#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_QINT(_)
_(uint8_t, Byte)
_(int8_t, Char)
_(int16_t, Short)
_(int, Int)
_(int64_t, Long)
_(at::Half, Half)
_(float, Float)
_(double, Double)
_(std::complexat::Half, ComplexHalf)
_(std::complex, ComplexFloat)
_(std::complex, ComplexDouble)
_(bool, Bool)
_(at::BFloat16, BFloat16)

enum class ScalarType : int8_t {
#define DEFINE_ENUM(_1, n) n,
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_ENUM)
#undef DEFINE_ENUM
Undefined,
NumOptions
};

template <typename T, int size>
struct Array {
T data[size];

device T operator[](int i) const {
return data[i];
}
device T& operator[](int i) {
return data[i];
}
Array() = default;
Array(const Array&) = default;
Array& operator=(const Array&) = default;
device Array(T x) {
for (int i = 0; i < size; i++) {
data[i] = x;
}
}
};

namespace std {

template class complex;

template complex<_Tp> operator*(const complex<_Tp>& __z, const complex<_Tp>& __w);
template complex<_Tp> operator/(const complex<_Tp>& __x, const complex<_Tp>& __y);

template
class complex
{
public:
typedef _Tp value_type;
private:
value_type _re;
value_type _im;
public:
constexpr
complex(const value_type& __re = value_type(), const value_type& __im = value_type())
: _re(__re), _im(__im) {}
template constexpr
complex(const complex<_Xp>& __c)
: _re(__c.real()), _im(__c.imag()) {}

  constexpr value_type real() const {return __re_;}
  constexpr value_type imag() const {return __im_;}

  void real(value_type __re) {__re_ = __re;}
  void imag(value_type __im) {__im_ = __im;}

  constexpr operator bool() const {
      return real() || imag();
  }

  complex& operator= (const value_type& __re)
      {__re_ = __re; __im_ = value_type(); return *this;}
  complex& operator+=(const value_type& __re) {__re_ += __re; return *this;}
  complex& operator-=(const value_type& __re) {__re_ -= __re; return *this;}
  complex& operator*=(const value_type& __re) {__re_ *= __re; __im_ *= __re; return *this;}
  complex& operator/=(const value_type& __re) {__re_ /= __re; __im_ /= __re; return *this;}

  template<class _Xp> complex& operator= (const complex<_Xp>& __c)
      {
          __re_ = __c.real();
          __im_ = __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator+=(const complex<_Xp>& __c)
      {
          __re_ += __c.real();
          __im_ += __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator-=(const complex<_Xp>& __c)
      {
          __re_ -= __c.real();
          __im_ -= __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator*=(const complex<_Xp>& __c)
      {
          *this = *this * complex(__c.real(), __c.imag());
          return *this;
      }
  template<class _Xp> complex& operator/=(const complex<_Xp>& __c)
      {
          *this = *this / complex(__c.real(), __c.imag());
          return *this;
      }

};

template<> class complex;

template<>
class complex
{
float _re;
float _im;
public:
typedef float value_type;

  constexpr complex(float __re = 0.0f, float __im = 0.0f)
      : __re_(__re), __im_(__im) {}

  explicit constexpr complex(const complex<double>& __c);

  constexpr float real() const {return __re_;}
  constexpr float imag() const {return __im_;}

  void real(value_type __re) {__re_ = __re;}
  void imag(value_type __im) {__im_ = __im;}

  constexpr operator bool() const {
      return real() || imag();
  }

  complex& operator= (float __re)
      {__re_ = __re; __im_ = value_type(); return *this;}
  complex& operator+=(float __re) {__re_ += __re; return *this;}
  complex& operator-=(float __re) {__re_ -= __re; return *this;}
  complex& operator*=(float __re) {__re_ *= __re; __im_ *= __re; return *this;}
  complex& operator/=(float __re) {__re_ /= __re; __im_ /= __re; return *this;}

  template<class _Xp> complex& operator= (const complex<_Xp>& __c)
      {
          __re_ = __c.real();
          __im_ = __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator+=(const complex<_Xp>& __c)
      {
          __re_ += __c.real();
          __im_ += __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator-=(const complex<_Xp>& __c)
      {
          __re_ -= __c.real();
          __im_ -= __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator*=(const complex<_Xp>& __c)
      {
          *this = *this * complex(__c.real(), __c.imag());
          return *this;
      }
  template<class _Xp> complex& operator/=(const complex<_Xp>& __c)
      {
          *this = *this / complex(__c.real(), __c.imag());
          return *this;
      }

};

template<>
class complex
{
double _re;
double _im;
public:
typedef double value_type;

  constexpr complex(double __re = 0.0, double __im = 0.0)
      : __re_(__re), __im_(__im) {}

  constexpr complex(const complex<float>& __c);

  constexpr double real() const {return __re_;}
  constexpr double imag() const {return __im_;}

  void real(value_type __re) {__re_ = __re;}
  void imag(value_type __im) {__im_ = __im;}

  constexpr operator bool() const {
      return real() || imag();
  }

  complex& operator= (double __re)
      {__re_ = __re; __im_ = value_type(); return *this;}
  complex& operator+=(double __re) {__re_ += __re; return *this;}
  complex& operator-=(double __re) {__re_ -= __re; return *this;}
  complex& operator*=(double __re) {__re_ *= __re; __im_ *= __re; return *this;}
  complex& operator/=(double __re) {__re_ /= __re; __im_ /= __re; return *this;}

  template<class _Xp> complex& operator= (const complex<_Xp>& __c)
      {
          __re_ = __c.real();
          __im_ = __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator+=(const complex<_Xp>& __c)
      {
          __re_ += __c.real();
          __im_ += __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator-=(const complex<_Xp>& __c)
      {
          __re_ -= __c.real();
          __im_ -= __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator*=(const complex<_Xp>& __c)
      {
          *this = *this * complex(__c.real(), __c.imag());
          return *this;
      }
  template<class _Xp> complex& operator/=(const complex<_Xp>& __c)
      {
          *this = *this / complex(__c.real(), __c.imag());
          return *this;
      }

};

inline
constexpr
complex::complex(const complex& __c)
: _re(__c.real()), _im(__c.imag()) {}

inline
constexpr
complex::complex(const complex& __c)
: _re(__c.real()), _im(__c.imag()) {}

// 26.3.6 operators:

template
inline
complex<_Tp>
operator+(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__x);
__t += __y;
return __t;
}

template
inline
complex<_Tp>
operator+(const complex<_Tp>& __x, const _Tp& __y)
{
complex<_Tp> __t(__x);
__t += __y;
return __t;
}

template
inline
complex<_Tp>
operator+(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__y);
__t += __x;
return __t;
}

template
inline
complex<_Tp>
operator-(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__x);
__t -= __y;
return __t;
}

template
inline
complex<_Tp>
operator-(const complex<_Tp>& __x, const _Tp& __y)
{
complex<_Tp> __t(__x);
__t -= __y;
return __t;
}

template
inline
complex<_Tp>
operator-(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(-__y);
__t += __x;
return __t;
}

template
complex<_Tp>
operator*(const complex<_Tp>& __z, const complex<_Tp>& __w)
{
_Tp __a = __z.real();
_Tp __b = __z.imag();
_Tp __c = __w.real();
_Tp __d = __w.imag();
_Tp __ac = __a * __c;
_Tp __bd = __b * __d;
_Tp __ad = __a * __d;
_Tp __bc = __b * __c;
_Tp __x = __ac - __bd;
_Tp __y = __ad + __bc;
if (isnan(__x) && isnan(__y))
{
bool __recalc = false;
if (isinf(__a) || isinf(__b))
{
__a = copysign(isinf(__a) ? _Tp(1) : _Tp(0), __a);
__b = copysign(isinf(__b) ? _Tp(1) : _Tp(0), __b);
if (isnan(__c))
__c = copysign(_Tp(0), __c);
if (isnan(__d))
__d = copysign(_Tp(0), __d);
__recalc = true;
}
if (isinf(__c) || isinf(__d))
{
__c = copysign(isinf(__c) ? _Tp(1) : _Tp(0), __c);
__d = copysign(isinf(__d) ? _Tp(1) : _Tp(0), __d);
if (isnan(__a))
__a = copysign(_Tp(0), __a);
if (isnan(__b))
__b = copysign(_Tp(0), __b);
__recalc = true;
}
if (!__recalc && (isinf(__ac) || isinf(__bd) ||
isinf(__ad) || isinf(__bc)))
{
if (isnan(__a))
__a = copysign(_Tp(0), __a);
if (isnan(__b))
__b = copysign(_Tp(0), __b);
if (isnan(__c))
__c = copysign(_Tp(0), __c);
if (isnan(__d))
__d = copysign(_Tp(0), __d);
__recalc = true;
}
if (__recalc)
{
__x = _Tp(INFINITY) * (__a * __c - __b * __d);
__y = _Tp(INFINITY) * (__a * __d + __b * __c);
}
}
return complex<_Tp>(__x, __y);
}

template
inline
complex<_Tp>
operator*(const complex<_Tp>& __x, const _Tp& __y)
{
complex<_Tp> __t(__x);
__t *= __y;
return __t;
}

template
inline
complex<_Tp>
operator*(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__y);
__t *= __x;
return __t;
}

template
complex<_Tp>
operator/(const complex<_Tp>& __z, const complex<_Tp>& __w)
{
int __ilogbw = 0;
_Tp __a = __z.real();
_Tp __b = __z.imag();
_Tp __c = __w.real();
_Tp __d = __w.imag();
_Tp __logbw = logb(fmax(fabs(__c), fabs(__d)));
if (isfinite(__logbw))
{
__ilogbw = static_cast(__logbw);
__c = scalbn(__c, -__ilogbw);
__d = scalbn(__d, -__ilogbw);
}
_Tp __denom = __c * __c + __d * __d;
_Tp __x = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
_Tp __y = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
if (isnan(__x) && isnan(__y))
{
if ((__denom == _Tp(0)) && (!isnan(__a) || !isnan(__b)))
{
__x = copysign(_Tp(INFINITY), __c) * __a;
__y = copysign(_Tp(INFINITY), __c) * __b;
}
else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d))
{
__a = copysign(isinf(__a) ? _Tp(1) : _Tp(0), __a);
__b = copysign(isinf(__b) ? _Tp(1) : _Tp(0), __b);
__x = _Tp(INFINITY) * (__a * __c + __b * __d);
__y = _Tp(INFINITY) * (__b * __c - __a * __d);
}
else if (isinf(__logbw) && __logbw > _Tp(0) && isfinite(__a) && isfinite(__b))
{
__c = copysign(isinf(__c) ? _Tp(1) : _Tp(0), __c);
__d = copysign(isinf(__d) ? _Tp(1) : _Tp(0), __d);
__x = _Tp(0) * (__a * __c + __b * __d);
__y = _Tp(0) * (__b * __c - __a * __d);
}
}
return complex<_Tp>(__x, __y);
}

template
inline
complex<_Tp>
operator/(const complex<_Tp>& __x, const _Tp& __y)
{
return complex<_Tp>(__x.real() / __y, __x.imag() / __y);
}

template
inline
complex<_Tp>
operator/(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__x);
__t /= __y;
return __t;
}

template
inline
complex<_Tp>
operator+(const complex<_Tp>& __x)
{
return __x;
}

template
inline
complex<_Tp>
operator-(const complex<_Tp>& __x)
{
return complex<_Tp>(-__x.real(), -__x.imag());
}

template
inline constexpr
bool
operator==(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return __x.real() == __y.real() && __x.imag() == __y.imag();
}

template
inline constexpr
bool
operator==(const complex<_Tp>& __x, const _Tp& __y)
{
return __x.real() == __y && __x.imag() == 0;
}

template
inline constexpr
bool
operator==(const _Tp& __x, const complex<_Tp>& __y)
{
return __x == __y.real() && 0 == __y.imag();
}

template
inline constexpr
bool
operator!=(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return !(__x == __y);
}

template
inline constexpr
bool
operator!=(const complex<_Tp>& __x, const _Tp& __y)
{
return !(__x == __y);
}

template
inline constexpr
bool
operator!=(const _Tp& __x, const complex<_Tp>& __y)
{
return !(__x == __y);
}

template
inline constexpr
bool
operator&&(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return bool(__x) && bool(__y);
}

template
inline constexpr
bool
isnan(const complex<_Tp>& __x)
{
return isnan(__x.real()) || isnan(__x.imag());
}

template
inline constexpr
bool
operator||(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return bool(__x) || bool(__y);
}

// 26.3.7 values:

template <class _Tp, bool = is_integral<_Tp>::value,
bool = is_floating_point<_Tp>::value
>
struct __libcpp_complex_overload_traits {};

// Integral Types
template
struct __libcpp_complex_overload_traits<_Tp, true, false>
{
typedef double _ValueType;
typedef complex _ComplexType;
};

// Floating point types
template
struct __libcpp_complex_overload_traits<_Tp, false, true>
{
typedef _Tp _ValueType;
typedef complex<_Tp> _ComplexType;
};

// real

template
inline constexpr
_Tp
real(const complex<_Tp>& __c)
{
return __c.real();
}

template
inline constexpr
typename __libcpp_complex_overload_traits<_Tp>::_ValueType
real(_Tp __re)
{
return __re;
}

// imag

template
inline constexpr
_Tp
imag(const complex<_Tp>& __c)
{
return __c.imag();
}

template
inline constexpr
typename __libcpp_complex_overload_traits<_Tp>::_ValueType
imag(_Tp)
{
return 0;
}

// abs

template
inline
_Tp
abs(const complex<_Tp>& __c)
{
return hypot(__c.real(), __c.imag());
}

// arg

template
inline
_Tp
arg(const complex<_Tp>& __c)
{
return atan2(__c.imag(), __c.real());
}

template
inline
typename enable_if
<
is_integral<_Tp>::value || is_same<_Tp, double>::value,
double

::type
arg(_Tp __re)
{
return atan2(0., __re);
}

template
inline
typename enable_if<
is_same<_Tp, float>::value,
float

::type
arg(_Tp __re)
{
return atan2f(0.F, __re);
}

}

namespace std {

// norm

template
inline
_Tp
norm(const complex<_Tp>& __c)
{
if (isinf(__c.real()))
return abs(__c.real());
if (isinf(__c.imag()))
return abs(__c.imag());
return __c.real() * __c.real() + __c.imag() * __c.imag();
}

template
inline
typename __libcpp_complex_overload_traits<_Tp>::_ValueType
norm(_Tp __re)
{
typedef typename __libcpp_complex_overload_traits<_Tp>::_ValueType _ValueType;
return static_cast<_ValueType>(__re) * __re;
}

// conj

template
inline
complex<_Tp>
conj(const complex<_Tp>& __c)
{
return complex<_Tp>(__c.real(), -__c.imag());
}

template
inline
typename __libcpp_complex_overload_traits<_Tp>::_ComplexType
conj(_Tp __re)
{
typedef typename __libcpp_complex_overload_traits<_Tp>::_ComplexType _ComplexType;
return _ComplexType(__re);
}

// proj

template
inline
complex<_Tp>
proj(const complex<_Tp>& __c)
{
complex<_Tp> __r = __c;
if (isinf(__c.real()) || isinf(__c.imag()))
__r = complex<_Tp>(INFINITY, copysign(_Tp(0), __c.imag()));
return __r;
}

template
inline
typename enable_if
<
is_floating_point<_Tp>::value,
typename __libcpp_complex_overload_traits<_Tp>::_ComplexType

::type
proj(_Tp __re)
{
if (isinf(__re))
__re = abs(__re);
return complex<_Tp>(__re);
}

template
inline
typename enable_if
<
is_integral<_Tp>::value,
typename __libcpp_complex_overload_traits<_Tp>::_ComplexType

::type
proj(_Tp __re)
{
typedef typename __libcpp_complex_overload_traits<_Tp>::_ComplexType _ComplexType;
return _ComplexType(__re);
}

// polar

template
complex<_Tp>
polar(const _Tp& __rho, const _Tp& __theta = _Tp())
{
if (isnan(__rho) || signbit(__rho))
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
if (isnan(__theta))
{
if (isinf(__rho))
return complex<_Tp>(__rho, __theta);
return complex<_Tp>(__theta, __theta);
}
if (isinf(__theta))
{
if (isinf(__rho))
return complex<_Tp>(__rho, _Tp(NAN));
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
}
_Tp __x = __rho * cos(__theta);
if (isnan(__x))
__x = 0;
_Tp __y = __rho * sin(__theta);
if (isnan(__y))
__y = 0;
return complex<_Tp>(__x, __y);
}

// log

template
inline
complex<_Tp>
log(const complex<_Tp>& __x)
{
return complex<_Tp>(log(abs(__x)), arg(__x));
}

// log10

template
inline
complex<_Tp>
log10(const complex<_Tp>& __x)
{
return log(__x) / log(_Tp(10));
}

// log2

template
inline
complex<_Tp>
log2(const complex<_Tp>& __x)
{
return log(__x) / log(_Tp(2));
}

// sqrt

template
complex<_Tp>
sqrt(const complex<_Tp>& __x)
{
if (isinf(__x.imag()))
return complex<_Tp>(_Tp(INFINITY), __x.imag());
if (isinf(__x.real()))
{
if (__x.real() > _Tp(0))
return complex<_Tp>(__x.real(), isnan(__x.imag()) ? __x.imag() : copysign(_Tp(0), __x.imag()));
return complex<_Tp>(isnan(__x.imag()) ? __x.imag() : _Tp(0), copysign(__x.real(), __x.imag()));
}
return polar(sqrt(abs(__x)), arg(__x) / _Tp(2));
}

// exp

template
complex<_Tp>
exp(const complex<_Tp>& __x)
{
_Tp __i = __x.imag();
if (__i == 0) {
return complex<_Tp>(exp(__x.real()), copysign(_Tp(0), __x.imag()));
}
if (isinf(__x.real()))
{
if (__x.real() < _Tp(0))
{
if (!isfinite(__i))
__i = _Tp(1);
}
else if (__i == 0 || !isfinite(__i))
{
if (isinf(__i))
__i = _Tp(NAN);
return complex<_Tp>(__x.real(), __i);
}
}
_Tp __e = exp(__x.real());
return complex<_Tp>(__e * cos(__i), __e * sin(__i));
}

// pow

template
inline
complex<_Tp>
pow(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return exp(__y * log(__x));
}

template<class _Tp, class _Up>
inline
complex<typename __promote<_Tp, _Up>::type>
pow(const complex<_Tp>& __x, const complex<_Up>& __y)
{
typedef complex<typename __promote<_Tp, _Up>::type> result_type;
return std::pow(result_type(__x), result_type(__y));
}

template<class _Tp, class _Up>
inline
typename enable_if
<
is_arithmetic<_Up>::value,
complex<typename __promote<_Tp, _Up>::type>

::type
pow(const complex<_Tp>& __x, const _Up& __y)
{
typedef complex<typename __promote<_Tp, _Up>::type> result_type;
return std::pow(result_type(__x), result_type(__y));
}

template<class _Tp, class _Up>
inline
typename enable_if
<
is_arithmetic<_Tp>::value,
complex<typename __promote<_Tp, _Up>::type>

::type
pow(const _Tp& __x, const complex<_Up>& __y)
{
typedef complex<typename __promote<_Tp, _Up>::type> result_type;
return std::pow(result_type(__x), result_type(__y));
}

// __sqr, computes pow(x, 2)

template
inline
complex<_Tp>
__sqr(const complex<_Tp>& __x)
{
return complex<_Tp>((__x.real() - __x.imag()) * (__x.real() + __x.imag()),
_Tp(2) * __x.real() * __x.imag());
}

// asinh

template
complex<_Tp>
asinh(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real()))
{
if (isnan(__x.imag()))
return __x;
if (isinf(__x.imag()))
return complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag()));
return complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
}
if (isnan(__x.real()))
{
if (isinf(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (__x.imag() == 0)
return __x;
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(copysign(__x.imag(), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
complex<_Tp> __z = log(__x + sqrt(__sqr(__x) + _Tp(1)));
return complex<_Tp>(copysign(__z.real(), __x.real()), copysign(__z.imag(), __x.imag()));
}

// acosh

template
complex<_Tp>
acosh(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real()))
{
if (isnan(__x.imag()))
return complex<_Tp>(abs(__x.real()), __x.imag());
if (isinf(__x.imag()))
{
if (__x.real() > 0)
return complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag()));
else
return complex<_Tp>(-__x.real(), copysign(__pi * _Tp(0.75), __x.imag()));
}
if (__x.real() < 0)
return complex<_Tp>(-__x.real(), copysign(__pi, __x.imag()));
return complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
}
if (isnan(__x.real()))
{
if (isinf(__x.imag()))
return complex<_Tp>(abs(__x.imag()), __x.real());
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(abs(__x.imag()), copysign(__pi/_Tp(2), __x.imag()));
complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1)));
return complex<_Tp>(copysign(__z.real(), _Tp(0)), copysign(__z.imag(), __x.imag()));
}

// atanh

template
complex<_Tp>
atanh(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.imag()))
{
return complex<_Tp>(copysign(_Tp(0), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
}
if (isnan(__x.imag()))
{
if (isinf(__x.real()) || __x.real() == 0)
return complex<_Tp>(copysign(_Tp(0), __x.real()), __x.imag());
return complex<_Tp>(__x.imag(), __x.imag());
}
if (isnan(__x.real()))
{
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.real()))
{
return complex<_Tp>(copysign(_Tp(0), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
}
if (abs(__x.real()) == _Tp(1) && __x.imag() == _Tp(0))
{
return complex<_Tp>(copysign(_Tp(INFINITY), __x.real()), copysign(_Tp(0), __x.imag()));
}
complex<_Tp> __z = log((_Tp(1) + __x) / (_Tp(1) - __x)) / _Tp(2);
return complex<_Tp>(copysign(__z.real(), __x.real()), copysign(__z.imag(), __x.imag()));
}

// sinh

template
complex<_Tp>
sinh(const complex<_Tp>& __x)
{
if (isinf(__x.real()) && !isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.real() == 0 && !isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.imag() == 0 && !isfinite(__x.real()))
return __x;
return complex<_Tp>(sinh(__x.real()) * cos(__x.imag()), cosh(__x.real()) * sin(__x.imag()));
}

// cosh

template
complex<_Tp>
cosh(const complex<_Tp>& __x)
{
if (isinf(__x.real()) && !isfinite(__x.imag()))
return complex<_Tp>(abs(__x.real()), _Tp(NAN));
if (__x.real() == 0 && !isfinite(__x.imag()))
return complex<_Tp>(_Tp(NAN), __x.real());
if (__x.real() == 0 && __x.imag() == 0)
return complex<_Tp>(_Tp(1), __x.imag());
if (__x.imag() == 0 && !isfinite(__x.real()))
return complex<_Tp>(abs(__x.real()), __x.imag());
return complex<_Tp>(cosh(__x.real()) * cos(__x.imag()), sinh(__x.real()) * sin(__x.imag()));
}

// tanh

template
complex<_Tp>
tanh(const complex<_Tp>& __x)
{
if (isinf(__x.real()))
{
if (!isfinite(__x.imag()))
return complex<_Tp>(copysign(_Tp(1), __x.real()), _Tp(0));
return complex<_Tp>(copysign(_Tp(1), __x.real()), copysign(_Tp(0), sin(_Tp(2) * __x.imag())));
}
if (isnan(__x.real()) && __x.imag() == 0)
return __x;
_Tp __2r(_Tp(2) * __x.real());
_Tp __2i(_Tp(2) * __x.imag());
_Tp __d(cosh(__2r) + cos(__2i));
_Tp __2rsh(sinh(__2r));
if (isinf(__2rsh) && isinf(__d))
return complex<_Tp>(__2rsh > _Tp(0) ? _Tp(1) : _Tp(-1),
__2i > _Tp(0) ? _Tp(0) : _Tp(-0.));
return complex<_Tp>(__2rsh/__d, sin(__2i)/__d);
}

// asin

template
complex<_Tp>
asin(const complex<_Tp>& __x)
{
complex<_Tp> __z = asinh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}

// acos

template
complex<_Tp>
acos(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real()))
{
if (isnan(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (isinf(__x.imag()))
{
if (__x.real() < _Tp(0))
return complex<_Tp>(_Tp(0.75) * __pi, -__x.imag());
return complex<_Tp>(_Tp(0.25) * __pi, -__x.imag());
}
if (__x.real() < _Tp(0))
return complex<_Tp>(__pi, signbit(__x.imag()) ? -__x.real() : __x.real());
return complex<_Tp>(_Tp(0), signbit(__x.imag()) ? __x.real() : -__x.real());
}
if (isnan(__x.real()))
{
if (isinf(__x.imag()))
return complex<_Tp>(__x.real(), -__x.imag());
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(__pi/_Tp(2), -__x.imag());
if (__x.real() == 0 && (__x.imag() == 0 || isnan(__x.imag())))
return complex<_Tp>(__pi/_Tp(2), -__x.imag());
complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1)));
if (signbit(__x.imag()))
return complex<_Tp>(abs(__z.imag()), abs(__z.real()));
return complex<_Tp>(abs(__z.imag()), -abs(__z.real()));
}

// atan

template
complex<_Tp>
atan(const complex<_Tp>& __x)
{
complex<_Tp> __z = atanh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}

// sin

template
complex<_Tp>
sin(const complex<_Tp>& __x)
{
complex<_Tp> __z = sinh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}

// cos

template
inline
complex<_Tp>
cos(const complex<_Tp>& __x)
{
return cosh(complex<_Tp>(-__x.imag(), __x.real()));
}

// tan

template
complex<_Tp>
tan(const complex<_Tp>& __x)
{
complex<_Tp> __z = tanh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}

// Literal suffix for complex number literals [complex.literals]
inline namespace literals
{
inline namespace complex_literals
{
constexpr complex operator""i(long double __im)
{
return { 0.0, static_cast(__im) };
}

  constexpr complex<double> operator""i(unsigned long long __im)
  {
      return { 0.0, static_cast<double>(__im) };
  }


  constexpr complex<float> operator""if(long double __im)
  {
      return { 0.0f, static_cast<float>(__im) };
  }

  constexpr complex<float> operator""if(unsigned long long __im)
  {
      return { 0.0f, static_cast<float>(__im) };
  }
} // namespace complex_literals

} // namespace literals

} // namespace std

namespace c10 {
  template <typename T>
  struct LoadImpl {
    __device__ static T apply(const void *src) {
      return *reinterpret_cast<const T*>(src);
    }
  };

  template <>
  struct LoadImpl<bool> {
    __device__ static bool apply(const void *src) {
      static_assert(sizeof(bool) == sizeof(char), "");
      return LoadImpl<char>::apply(src);
    }
  };

  template <typename T>
  __device__ T load(const void *src) {
    return LoadImpl<T>::apply(src);
  }

  template <typename scalar_t>
  __device__ scalar_t load(const scalar_t *src) {
    return LoadImpl<scalar_t>::apply(src);
  }
}  // namespace c10

template
device inline scalar_t load(char* base_ptr, uint32_t offset) {
return c10::load(reinterpret_cast<scalar_t*>(base_ptr) + offset);
}

template
device inline void store(scalar_t value, char *base_ptr, uint32_t offset) {
*(reinterpret_cast<scalar_t *>(base_ptr) + offset) = value;
}

// aligned vector generates vectorized load/store on CUDA
template<typename scalar_t, int vec_size>
struct alignas(sizeof(scalar_t) * vec_size) aligned_vector {
scalar_t val[vec_size];
};

template <int vec_size, typename scalar_t>
device aligned_vector<scalar_t, vec_size> load_vector(const scalar_t *base_ptr, uint32_t offset) {
using vec_t = aligned_vector<scalar_t, vec_size>;
auto *from = reinterpret_cast<const vec_t *>(base_ptr);
return from[offset];
}

template
device aligned_vector<bool, vec_size> load_vector(const bool base_ptr, uint32_t offset) {
// See NOTE [Loading boolean values]
auto tmp = load_vector<vec_size>(reinterpret_cast<const uint8_t
>(base_ptr), offset);
aligned_vector<bool, vec_size> ret;
for (int i = 0; i < vec_size; ++i) {
ret.val[i] = bool(tmp.val[i]);
}
return ret;
}

template T abs_kernel(T x) { return std::abs(x); }

// TODO: setup grid-stride loop

extern "C" global
void abs_kernel_vectorized4_kernel(
const int N,
Array<char*, 1+1> data,
std::complex scalar_val) //[1+1],
{
constexpr int vec_size = 4;
using scalar_t = std::complex;
int remaining = N - block_work_size * blockIdx.x;
int thread_idx = threadIdx.x;
int idx = blockIdx.x;
std::complex arg0[4];

  std::complex<float> out0[4];


  if (remaining < block_work_size) {
    #pragma unroll
    for (int j = 0; j < thread_work_size; j++){
      if (thread_idx >= remaining) {
        break;
      }
      int linear_idx = thread_idx + block_work_size * idx;
      arg0[j] = load<std::complex<float>>(data[1], linear_idx);

      thread_idx += num_threads;
    }
    #pragma unroll
    for (int j = 0; j < thread_work_size; j++) {
      if ((threadIdx.x  + j*num_threads) < remaining) {
        out0[j] = abs_kernel<std::complex<float>>(arg0[j] );
      }
    }
    thread_idx = threadIdx.x;
    #pragma unroll
    for (int j = 0; j < thread_work_size; j++) {
      if (thread_idx >= remaining) {
          break;
      }
      int linear_idx = thread_idx + block_work_size * idx;
      store<std::complex<float>>(out0[j], data[0], linear_idx);

      thread_idx += num_threads;
    }
  } else {
    static constexpr int loop_size = thread_work_size / vec_size;

//actual loading
auto * input0 = reinterpret_cast<const scalar_t*>(data[0+1]) + block_work_size * idx;

    #pragma unroll
    for (int i = 0; i<loop_size; i++){
      const auto vec0 = load_vector<vec_size>(input0, thread_idx);
      #pragma unroll
      for (int j=0; j < vec_size; j++){
        arg0[vec_size * i + j] = vec0.val[j];
      }

      thread_idx += num_threads;
    }

    #pragma unroll
    for (int j = 0; j < thread_work_size; j++) {
      out0[j] = abs_kernel<std::complex<float>>(arg0[j] );
    }

    using vec_t_output = aligned_vector<std::complex<float>, vec_size>;
    vec_t_output* to_0 = reinterpret_cast<vec_t_output*>(data[0]) + block_work_size / vec_size * idx;

    int thread_idx = threadIdx.x;
    #pragma unroll
    for (int i = 0; i<loop_size; i++){
      vec_t_output v;
      #pragma unroll
      for (int j=0; j<vec_size; j++){
      v.val[j] = out0[vec_size * i + j];
      }
      to_0[thread_idx] = v;

      thread_idx += num_threads;
    }
  }

}
nvrtc: error: invalid value for --gpu-architecture (-arch)

生成流程结束,资源已尝试清理。
Traceback (most recent call last):
File "L:\SongGeneration\installer_files\env\Lib\site-packages\gradio\queueing.py", line 527, in process_events
response = await route_utils.call_process_api(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\gradio\route_utils.py", line 261, in call_process_api
output = await app.get_blocks().process_api(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\gradio\blocks.py", line 1795, in process_api
data = await self.postprocess_data(fn_index, result["prediction"], state)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\gradio\blocks.py", line 1625, in postprocess_data
outputs_cached = await processing_utils.async_move_files_to_cache(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\gradio\processing_utils.py", line 410, in async_move_files_to_cache
return await client_utils.async_traverse(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\gradio_client\utils.py", line 1002, in async_traverse
return await func(json_obj)
^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\gradio\processing_utils.py", line 384, in _move_to_cache
temp_file_path = await block.async_move_resource_to_block_cache(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\gradio\blocks.py", line 272, in async_move_resource_to_block_cache
url_or_file_path = str(utils.abspath(url_or_file_path))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\site-packages\gradio\utils.py", line 1007, in abspath
if is_symlink or path == path.resolve(): # in case path couldn't be resolved
^^^^^^^^^^^^^^
File "L:\SongGeneration\installer_files\env\Lib\pathlib.py", line 1003, in resolve
p.stat()
File "L:\SongGeneration\installer_files\env\Lib\pathlib.py", line 1013, in stat
return os.stat(self, follow_symlinks=follow_symlinks)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

我使用Windows11 22h2,系统本身应该支持长文件名,参考音频路径为:L:\SongGeneration\1.wav

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions