Skip to content

Commit 3efe0b8

Browse files
authored
Merge branch 'main' into etiotto.add_is_xpu_xxx
2 parents 774810a + 8bbfd21 commit 3efe0b8

File tree

34 files changed

+138
-160
lines changed

34 files changed

+138
-160
lines changed

.github/WINDOWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ Install Microsoft Visual Studio 2022 and make sure the following [components](ht
3939

4040
### Intel® Deep Learning Essentials
4141

42-
Install [Intel® Deep Learning Essentials 2025.2.1](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?packages=dl-essentials&dl-essentials-os=windows&dl-win=offline).
42+
Install [Intel® Deep Learning Essentials 2025.3.1](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?packages=dl-essentials&dl-essentials-os=windows&dl-win=offline).
4343
By default, it is installed to `C:\Program Files (x86)\Intel\oneAPI`.
4444

4545
### Chocolatey
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1e69f40b3c03492eb3dd7e03462a5566f29674d3
1+
549347d24e9b509b653a350053d56992fc8436ad

.github/pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
01f94d4096060597e2815efe385255ac19c9c787
1+
365a6c84db516f244b7234b7aa3c8843af52936b

.github/workflows/build-test-reusable.yml

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ jobs:
194194
matrix:
195195
suite:
196196
- minicore
197+
- language
197198
- scaled_dot
198199
- gluon
199200
- triton-kernels
@@ -303,6 +304,11 @@ jobs:
303304
run: |
304305
${{ env.TRITON_TEST_CMD }} --minicore
305306
307+
- name: Run language tests
308+
if: matrix.suite == 'language'
309+
run: |
310+
${{ env.TRITON_TEST_CMD }} --language
311+
306312
- name: Run mxfp tests
307313
if: matrix.suite == 'rest'
308314
run: |
@@ -329,14 +335,6 @@ jobs:
329335
export PYTEST_MAX_PROCESSES=4
330336
${{ env.TRITON_TEST_CMD }} --triton-kernels
331337
332-
# FIXME: make sure new tutorials are added to one of the groups (scaled_dot, rest, tutorial-faX)
333-
- name: Select tutorials to run (scaled_dot)
334-
if: matrix.suite == 'scaled_dot'
335-
run: |
336-
cat <<EOF | tee tutorials.txt
337-
09-persistent-matmul
338-
EOF
339-
340338
- name: Select tutorials to run (rest)
341339
if: matrix.suite == 'rest'
342340
run: |
@@ -348,11 +346,12 @@ jobs:
348346
05-layer-norm
349347
07-extern-functions
350348
08-grouped-gemm
349+
09-persistent-matmul
351350
10-experimental-block-pointer
352351
EOF
353352
354353
- name: Run Tutorials
355-
if: matrix.suite == 'scaled_dot' || matrix.suite == 'rest'
354+
if: matrix.suite == 'rest'
356355
run: |
357356
${{ env.TRITON_TEST_CMD }} --select-from-file tutorials.txt --tutorial
358357

.github/workflows/pip-test-windows.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ jobs:
9090
.venv\Scripts\activate.ps1
9191
Invoke-BatchFile "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
9292
cd ${{ env.NEW_WORKSPACE }}
93-
pip install -U pybind11 'cmake>=3.20,<4.0' intel-sycl-rt==2025.2.1 build
93+
pip install -U pybind11 'cmake>=3.20,<4.0' intel-sycl-rt==2025.3.1 build
9494
# `build` can't determine that Ninja is already installed.
9595
# similar issue: https://github.com/pypa/build/issues/506
9696
python -m build --wheel --no-isolation --skip-dependency-check

.github/workflows/pip-test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ jobs:
6565
- name: Install runtime dependencies
6666
run: |
6767
curl -sSLO --retry 10 https://raw.githubusercontent.com/pytorch/pytorch/$(<.github/pins/pytorch.txt)/.github/scripts/generate_binary_build_matrix.py
68-
sed -i '/^validate_nccl_dep_consistency.*/d' generate_binary_build_matrix.py
68+
sed -i 's/^\(\s*\)validate_nccl_dep_consistency(arch_version)/\1pass/' generate_binary_build_matrix.py
6969
python -c "from generate_binary_build_matrix import PYTORCH_EXTRA_INSTALL_REQUIREMENTS; print('\n'.join(PYTORCH_EXTRA_INSTALL_REQUIREMENTS['xpu'].split(' | ')))" | tee /tmp/requirements.txt
7070
pip install -r /tmp/requirements.txt
7171
pip install transformers==4.54.0

lib/Conversion/TritonGPUToLLVM/FuncOpToLLVM.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,25 @@ struct FuncOpConversion : public ConvertOpToLLVMPattern<triton::FuncOp> {
179179
"ttg.total-num-warps"))
180180
numWarps = totalNumWarps.getInt();
181181

182+
int numCTAs = 1;
183+
if (auto module = funcOp->getParentOfType<ModuleOp>()) {
184+
if (auto moduleAttr =
185+
module->getAttrOfType<IntegerAttr>(triton::gpu::AttrNumCTAsName))
186+
numCTAs = moduleAttr.getInt();
187+
}
188+
182189
// Set `nvvm.maxnreg` if it was specified on the module.
183190
if (Attribute maxnregAttr =
184191
funcOp.getParentOp()->getAttr(triton::gpu::AttrMaxRegistersName))
185192
newFuncOp->setAttr(NVVM::NVVMDialect::getMaxnregAttrName(), maxnregAttr);
186193

194+
// Do we want to do this for nCTAs == 1 whenever sm >= 90?
195+
if (numCTAs > 1) {
196+
// Request a specific number of CTAs per cluster in the generated PTX.
197+
newFuncOp->setAttr(NVVM::NVVMDialect::getClusterDimAttrName(),
198+
rewriter.getDenseI32ArrayAttr(numCTAs));
199+
}
200+
187201
// Set an attribute for reqntidx, it could be used in latter LLVM codegen
188202
// for `nvvm.annotation` metadata.
189203
newFuncOp->setAttr(NVVM::NVVMDialect::getReqntidAttrName(),

python/triton/compiler/compiler.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -297,18 +297,6 @@ def compile(src, target=None, options=None, _env_vars=None):
297297

298298
metadata["cache_dir"] = fn_cache_manager.cache_dir
299299
metadata["triton_version"] = __version__
300-
cluster_dims = getattr(options, "cluster_dims", None)
301-
if cluster_dims is None:
302-
num_ctas = getattr(options, "num_ctas", None)
303-
if num_ctas is None:
304-
num_ctas = 1
305-
cluster_dims = (num_ctas, 1, 1)
306-
if not isinstance(cluster_dims, (list, tuple)):
307-
cluster_dims = (cluster_dims, )
308-
cluster_dims = tuple(cluster_dims)
309-
if len(cluster_dims) < 3:
310-
cluster_dims = cluster_dims + (1, ) * (3 - len(cluster_dims))
311-
metadata["cluster_dims"] = cluster_dims
312300
# run compilation pipeline and populate metadata
313301
stages = dict()
314302
backend.add_stages(stages, options, src.language)
@@ -435,7 +423,6 @@ def __init__(self, src, metadata_group, hash):
435423
from collections import namedtuple
436424
metadata_path = next((Path(p) for c, p in metadata_group.items() if c.endswith(".json")))
437425
metadata = json.loads(metadata_path.read_text())
438-
metadata['cluster_dims'] = tuple(metadata['cluster_dims'])
439426
# JSON serialization dumps the target as a dict. Restore it to a GPUTarget.
440427
target = metadata['target']
441428
metadata['target'] = GPUTarget(target['backend'], target['arch'], target['warp_size'])

scripts/skiplist/a770/interpreter.txt

Whitespace-only changes.

scripts/skiplist/a770/tools.txt

Whitespace-only changes.

0 commit comments

Comments
 (0)