|
4 | 4 | # This source code is licensed under the BSD-style license found in the |
5 | 5 | # LICENSE file in the root directory of this source tree. |
6 | 6 |
|
| 7 | +from typing import Optional |
| 8 | + |
7 | 9 | import executorch.backends.vulkan.patterns as vk_patterns |
8 | 10 | import torch.library |
9 | 11 |
|
@@ -321,6 +323,134 @@ def linear_qta8a_qga4w( |
321 | 323 | lib.impl(name, linear_qta8a_qga4w, "CompositeExplicitAutograd") |
322 | 324 | linear_qta8a_qga4w_op = getattr(getattr(torch.ops, namespace), name) |
323 | 325 |
|
| 326 | +################# |
| 327 | +## qaqw_linear ## |
| 328 | +################# |
| 329 | + |
| 330 | + |
| 331 | +def linear_q8ta_q8csw( |
| 332 | + x: torch.Tensor, |
| 333 | + input_scale: float, |
| 334 | + input_zero_point: int, |
| 335 | + qweights: torch.Tensor, |
| 336 | + weight_sums: torch.Tensor, |
| 337 | + weight_scales: torch.Tensor, |
| 338 | + bias: Optional[torch.Tensor] = None, |
| 339 | +): |
| 340 | + weight_zeros = torch.zeros_like(weight_scales, dtype=torch.int32) |
| 341 | + qweights = qweights.transpose(0, 1) |
| 342 | + weights = torch.ops.quantized_decomposed.dequantize_per_channel( |
| 343 | + qweights, |
| 344 | + weight_scales, |
| 345 | + weight_zeros, |
| 346 | + 0, |
| 347 | + -127, |
| 348 | + 127, |
| 349 | + torch.int8, |
| 350 | + ) |
| 351 | + |
| 352 | + # Perform linear operation |
| 353 | + out = torch.nn.functional.linear(x, weights) |
| 354 | + if bias is not None: |
| 355 | + out = out + bias |
| 356 | + |
| 357 | + return out |
| 358 | + |
| 359 | + |
| 360 | +name = "linear_q8ta_q8csw" |
| 361 | +lib.define( |
| 362 | + f""" |
| 363 | + {name}( |
| 364 | + Tensor x, |
| 365 | + float input_scale, |
| 366 | + int input_zero_point, |
| 367 | + Tensor qweight, |
| 368 | + Tensor weight_sums, |
| 369 | + Tensor weight_scales, |
| 370 | + Tensor? bias = None) -> Tensor |
| 371 | + """ |
| 372 | +) |
| 373 | +lib.impl(name, linear_q8ta_q8csw, "CompositeExplicitAutograd") |
| 374 | +qa_q8csw_linear = getattr(getattr(torch.ops, namespace), name) |
| 375 | + |
| 376 | +################## |
| 377 | +## conv2d_q8ta_q8csw ## |
| 378 | +################## |
| 379 | + |
| 380 | + |
| 381 | +def conv2d_q8ta_q8csw( |
| 382 | + x: torch.Tensor, |
| 383 | + input_scale: float, |
| 384 | + input_zero_point: int, |
| 385 | + qweights: torch.Tensor, |
| 386 | + weight_sums: torch.Tensor, |
| 387 | + weight_scales: torch.Tensor, |
| 388 | + bias: Optional[torch.Tensor], |
| 389 | + kernel_size: list, |
| 390 | + stride: list, |
| 391 | + padding: list, |
| 392 | + dilation: list, |
| 393 | + groups: int, |
| 394 | +): |
| 395 | + weight_zeros = torch.zeros_like(weight_scales, dtype=torch.int32) |
| 396 | + |
| 397 | + # Restore weight tensor from 2D format (IC * H * W, OC) back to 4D format (OC, IC, H, W) |
| 398 | + # First transpose to get (OC, IC * H * W) |
| 399 | + qweights_transposed = qweights.transpose(0, 1) |
| 400 | + |
| 401 | + # Extract kernel dimensions from the provided kernel_size |
| 402 | + H, W = kernel_size[0], kernel_size[1] |
| 403 | + |
| 404 | + # Calculate dimensions |
| 405 | + OC = qweights_transposed.shape[0] |
| 406 | + IC_H_W = qweights_transposed.shape[1] |
| 407 | + IC = IC_H_W // (H * W) |
| 408 | + |
| 409 | + # Reshape to original 4D format (OC, IC, H, W) |
| 410 | + qweights_4d = qweights_transposed.view(OC, IC, H, W) |
| 411 | + print(qweights_4d.shape) |
| 412 | + |
| 413 | + # Dequantize weights |
| 414 | + weights = torch.ops.quantized_decomposed.dequantize_per_channel( |
| 415 | + qweights_4d, |
| 416 | + weight_scales, |
| 417 | + weight_zeros, |
| 418 | + 0, # axis=0 for output channel quantization |
| 419 | + -127, |
| 420 | + 127, |
| 421 | + torch.int8, |
| 422 | + ) |
| 423 | + print(weights.shape) |
| 424 | + print(x.shape) |
| 425 | + |
| 426 | + # Perform convolution |
| 427 | + out = torch.nn.functional.conv2d( |
| 428 | + x, weights, bias, stride, padding, dilation, groups |
| 429 | + ) |
| 430 | + |
| 431 | + return out |
| 432 | + |
| 433 | + |
| 434 | +name = "conv2d_q8ta_q8csw" |
| 435 | +lib.define( |
| 436 | + f""" |
| 437 | + {name}( |
| 438 | + Tensor x, |
| 439 | + float input_scale, |
| 440 | + int input_zero_point, |
| 441 | + Tensor qweight, |
| 442 | + Tensor weight_sums, |
| 443 | + Tensor weight_scales, |
| 444 | + Tensor? bias, |
| 445 | + SymInt[] kernel_size, |
| 446 | + SymInt[] stride, |
| 447 | + SymInt[] padding, |
| 448 | + SymInt[] dilation, |
| 449 | + SymInt groups) -> Tensor |
| 450 | + """ |
| 451 | +) |
| 452 | +lib.impl(name, conv2d_q8ta_q8csw, "CompositeExplicitAutograd") |
| 453 | +conv2d_q8ta_q8csw_op = getattr(getattr(torch.ops, namespace), name) |
324 | 454 | ###################### |
325 | 455 | ## apply_rotary_emb ## |
326 | 456 | ###################### |
|
0 commit comments