Skip to content

Commit 0994765

Browse files
committed
Revert "[MLIR][XeGPU] Support order attribute and add pattern for vector.transpose in WgToSg Pass (llvm#165307)"
This reverts commit f291f33.
1 parent f60e693 commit 0994765

File tree

9 files changed

+286
-387
lines changed

9 files changed

+286
-387
lines changed

mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp

Lines changed: 15 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -280,82 +280,27 @@ LayoutAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
280280
FailureOr<SmallVector<Value>>
281281
LayoutAttr::delinearizeId(OpBuilder &builder, Location loc, Value linearId) {
282282

283-
SmallVector<int64_t> sgLayoutInt;
283+
// TODO: handle order attribute
284+
auto hasDefaultOrder = [&]() {
285+
DenseI32ArrayAttr order = getOrder();
286+
return !order || isIdentityPermutation(llvm::to_vector_of<int64_t>(
287+
llvm::reverse(order.asArrayRef())));
288+
};
289+
if (!hasDefaultOrder())
290+
return mlir::emitError(loc, "order attribute is currently not supported.");
291+
SmallVector<int64_t> layout;
284292
if (isForWorkgroup()) {
285-
sgLayoutInt = getEffectiveSgLayoutAsInt();
293+
layout = getEffectiveSgLayoutAsInt();
286294
} else if (isForSubgroup()) {
287-
sgLayoutInt = getEffectiveLaneLayoutAsInt();
288-
} else {
289-
return failure();
290-
}
291-
292-
DenseI32ArrayAttr orderAttr = getOrder();
293-
294-
// Handle order attribute
295-
SmallVector<int64_t> order;
296-
if (orderAttr && !orderAttr.empty()) {
297-
order = llvm::to_vector(
298-
llvm::map_range(orderAttr.asArrayRef(),
299-
[](int32_t idx) { return static_cast<int64_t>(idx); }));
295+
layout = getEffectiveLaneLayoutAsInt();
300296
} else {
301-
// Default order: [1, 0] for 2D (row-major), [2, 1, 0] for 3D, etc.
302-
order = llvm::to_vector(
303-
llvm::reverse(llvm::seq<int64_t>(0, sgLayoutInt.size())));
304-
}
305-
306-
if (order.size() != sgLayoutInt.size()) {
307297
return failure();
308298
}
299+
auto dims = llvm::map_to_vector(layout, [&](int64_t d) -> Value {
300+
return builder.createOrFold<arith::ConstantIndexOp>(loc, d);
301+
});
309302

310-
SmallVector<Value> result(sgLayoutInt.size());
311-
Value remaining = linearId;
312-
313-
/// Process dimensions in the order they appear in the order array
314-
/// The first dimension in order is the fastest-changing
315-
///
316-
/// Example walkthrough for linearId=22, sgLayout=[2,4,4], order=[2,1,0]:
317-
///
318-
/// Initial: remaining=22, dimIdx = order[i], dimSize = sgLayout[dimIdx],
319-
/// result=[?,?,?]
320-
///
321-
/// i=0 (process columns, dimIdx=2, dimSize=4):
322-
/// result[2] = 22 % 4 = 2 (column coordinate)
323-
/// remaining = 22 / 4 = 5 (5 complete groups of 4 columns processed)
324-
///
325-
/// i=1 (process rows, dimIdx=1, dimSize=4):
326-
/// result[1] = 5 % 4 = 1 (row coordinate)
327-
/// remaining = 5 / 4 = 1 (1 complete group of 4 rows processed)
328-
///
329-
/// i=2 (process layers, dimIdx=0, dimSize=2):
330-
/// result[0] = 1 % 2 = 1 (layer coordinate)
331-
/// (no remaining update - last iteration)
332-
///
333-
/// Final result: [1,1,2] = Layer 1, Row 1, Column 2
334-
for (size_t i = 0; i < order.size(); ++i) {
335-
int64_t dimIdx = order[i];
336-
int64_t dimSize = sgLayoutInt[dimIdx];
337-
338-
Value dimSizeVal =
339-
builder.createOrFold<arith::ConstantIndexOp>(loc, dimSize);
340-
341-
/// Extract the coordinate for this dimension using modulo operation
342-
/// This gives us "how far within this dimension" we are
343-
/// e.g., linearId=22, dimSize=4: 22 % 4 = 2 (we're at position 2 within
344-
/// this dimension)
345-
result[dimIdx] =
346-
builder.createOrFold<index::RemUOp>(loc, remaining, dimSizeVal);
347-
348-
/// Update remaining for the next dimension by removing what we've already
349-
/// processed. Division tells us "how many complete groups of this dimension
350-
/// we've gone through" e.g., linearId=22, dimSize=4: 22 / 4 = 5 (we've
351-
/// completed 5 groups of 4) Skip this for the last iteration since there's
352-
/// no next dimension to process
353-
if (i < order.size() - 1) {
354-
remaining =
355-
builder.createOrFold<index::DivUOp>(loc, remaining, dimSizeVal);
356-
}
357-
}
358-
return result;
303+
return affine::delinearizeIndex(builder, loc, linearId, dims);
359304
}
360305

361306
/// Implements DistributeLayoutAttr::computeDistributedCoords to generate

mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp

Lines changed: 12 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,70 +1219,6 @@ struct WgToSgMultiDimReductionOp
12191219
}
12201220
};
12211221

1222-
// This pattern transforms vector.transpose ops to work at subgroup level.
1223-
struct WgToSgVectorTransposeOp
1224-
: public OpConversionPattern<vector::TransposeOp> {
1225-
using OpConversionPattern<vector::TransposeOp>::OpConversionPattern;
1226-
1227-
LogicalResult
1228-
matchAndRewrite(vector::TransposeOp op, OneToNOpAdaptor adaptor,
1229-
ConversionPatternRewriter &rewriter) const override {
1230-
VectorType resultType = op.getResultVectorType();
1231-
1232-
ArrayRef<int64_t> wgShape = resultType.getShape();
1233-
xegpu::DistributeLayoutAttr layout =
1234-
xegpu::getDistributeLayoutAttr(op.getResult());
1235-
if (!layout || !layout.isForWorkgroup())
1236-
return failure();
1237-
1238-
xegpu::DistributeLayoutAttr sourceLayout =
1239-
xegpu::getDistributeLayoutAttr(op.getVector());
1240-
if (!sourceLayout || !sourceLayout.isForWorkgroup())
1241-
return failure();
1242-
1243-
SmallVector<int64_t> sourceSgLayout =
1244-
sourceLayout.getEffectiveSgLayoutAsInt();
1245-
SmallVector<int64_t> resultSgLayout = layout.getEffectiveSgLayoutAsInt();
1246-
DenseI32ArrayAttr sourceOrder = sourceLayout.getOrder();
1247-
DenseI32ArrayAttr resultOrder = layout.getOrder();
1248-
1249-
if (!sourceOrder || !resultOrder) {
1250-
return rewriter.notifyMatchFailure(
1251-
op, "Both source and result must have order attributes");
1252-
}
1253-
1254-
ArrayRef<int64_t> permutation = op.getPermutation();
1255-
size_t permutationSize = permutation.size();
1256-
if (sourceSgLayout.size() != permutationSize ||
1257-
resultSgLayout.size() != permutationSize) {
1258-
return rewriter.notifyMatchFailure(
1259-
op, "Layouts and permutation must have the same rank");
1260-
}
1261-
1262-
// Check that sgLayout, sgData & order are properly transposed for source
1263-
// and result
1264-
if (!layout.isTransposeOf(sourceLayout, permutation))
1265-
return rewriter.notifyMatchFailure(
1266-
op, "Result layout is not a valid transpose of source layout "
1267-
"according to permutation");
1268-
1269-
SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
1270-
VectorType newResultType =
1271-
VectorType::get(sgShape, resultType.getElementType());
1272-
SmallVector<Value> newTransposeOps;
1273-
for (auto src : adaptor.getVector()) {
1274-
auto newTranspose = vector::TransposeOp::create(
1275-
rewriter, op.getLoc(), newResultType, src, permutation);
1276-
xegpu::setDistributeLayoutAttr(newTranspose->getResult(0),
1277-
layout.dropSgLayoutAndData());
1278-
newTransposeOps.push_back(newTranspose.getResult());
1279-
}
1280-
1281-
rewriter.replaceOpWithMultiple(op, {newTransposeOps});
1282-
return success();
1283-
}
1284-
};
1285-
12861222
} // namespace
12871223

12881224
namespace mlir {
@@ -1297,8 +1233,7 @@ void populateXeGPUWgToSgDistributePatterns(RewritePatternSet &patterns) {
12971233
WgToSgArithConstantOp, WgToSgLoadGatherOpWithOffset,
12981234
WgToSgStoreScatterOpWithOffset, WgToSgLoadMatrixOp,
12991235
WgToSgStoreMatrixOp, WgToSgVectorStepOp, WgToSgVectorShapeCastOp,
1300-
WgToSgMultiDimReductionOp, WgToSgVectorTransposeOp>(
1301-
patterns.getContext());
1236+
WgToSgMultiDimReductionOp>(patterns.getContext());
13021237
}
13031238
} // namespace xegpu
13041239
} // namespace mlir
@@ -1425,9 +1360,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
14251360
return isLegal(layout);
14261361
});
14271362

1428-
target.addDynamicallyLegalOp<vector::ShapeCastOp, vector::StepOp,
1429-
vector::TransposeOp, vector::BroadcastOp,
1430-
vector::MultiDimReductionOp>(
1363+
target.addDynamicallyLegalOp<vector::ShapeCastOp, vector::StepOp>(
14311364
[=](Operation *op) -> bool {
14321365
// Check for either a SliceAttr or LayoutAttr on the result.
14331366
auto layout = xegpu::getDistributeLayoutAttr(op->getResult(0));
@@ -1446,6 +1379,16 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
14461379
return isLegal(layout);
14471380
});
14481381

1382+
target.addDynamicallyLegalOp<vector::BroadcastOp>(
1383+
[=](vector::BroadcastOp op) -> bool {
1384+
return isLegal(xegpu::getDistributeLayoutAttr(op.getResult()));
1385+
});
1386+
1387+
target.addDynamicallyLegalOp<vector::MultiDimReductionOp>(
1388+
[=](vector::MultiDimReductionOp op) -> bool {
1389+
return isLegal(xegpu::getDistributeLayoutAttr(op.getResult()));
1390+
});
1391+
14491392
target.addDynamicallyLegalOp<xegpu::ConvertLayoutOp>(
14501393
[=](xegpu::ConvertLayoutOp op) -> bool {
14511394
return isLegal(op.getInputLayout()) && isLegal(op.getTargetLayout());

mlir/test/Dialect/XeGPU/subgroup-distribute.mlir

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -268,16 +268,15 @@ gpu.module @xevm_module{
268268

269269
// -----
270270
// CHECK-LABEL: gpu.func @load_store_matrix_1({{.*}}) {
271-
// CHECK: %[[C2:.*]] = arith.constant 2 : index
272-
// CHECK: %[[C8:.*]] = arith.constant 8 : index
271+
// CHECK: %[[LAYOUT_X:.*]] = arith.constant 8 : index
272+
// CHECK: %[[LAYOUT_Y:.*]] = arith.constant 2 : index
273273
// CHECK: %[[LANE_ID:.*]] = gpu.lane_id
274-
// CHECK: %[[REMU1:.*]] = index.remu %[[LANE_ID]], %[[C8]]
275-
// CHECK: %[[DIVU:.*]] = index.divu %[[LANE_ID]], %[[C8]]
276-
// CHECK: %[[REMU2:.*]] = index.remu %[[DIVU]], %[[C2]]
277-
// CHECK: %[[REMU3:.*]] = index.remu %[[REMU2]], %[[C2]]
278-
// CHECK: %[[REMU4:.*]] = index.remu %[[REMU1]], %[[C8]]
279-
// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[REMU3]], %[[REMU4]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<1x1xf32>
280-
// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%[[REMU3]], %[[REMU4]]] : vector<1x1xf32>, !xegpu.mem_desc<32x32xf32>, index, index
274+
// CHECK: %[[DELINEARIZED_LANE_Y:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]]
275+
// CHECK: %[[DELINEARIZED_LANE_X:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]]
276+
// CHECK: %[[LANE_Y_OFFSET:.*]] = index.remu %[[DELINEARIZED_LANE_Y]], %[[LAYOUT_Y]]
277+
// CHECK: %[[LANE_X_OFFSET:.*]] = index.remu %[[DELINEARIZED_LANE_X]], %[[LAYOUT_X]]
278+
// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<1x1xf32>
279+
// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : vector<1x1xf32>, !xegpu.mem_desc<32x32xf32>, index, index
281280
gpu.module @xevm_module{
282281
gpu.func @load_store_matrix_1(%arg0: !xegpu.mem_desc<32x32xf32>) {
283282
%c0 = arith.constant 0 : index
@@ -289,20 +288,19 @@ gpu.module @xevm_module{
289288

290289
// -----
291290
// CHECK-LABEL: gpu.func @load_store_matrix_2({{.*}}) {
292-
// CHECK: %[[C8:.*]] = arith.constant 8 : index
293-
// CHECK: %[[C2:.*]] = arith.constant 2 : index
294-
// CHECK: %[[C4:.*]] = arith.constant 4 : index
295-
// CHECK: %[[C1:.*]] = arith.constant 1 : index
291+
// CHECK: %[[DIST_UNIT_HEIGHT_X:.*]] = arith.constant 4 : index
292+
// CHECK: %[[DIST_UNIT_HEIGHT_Y:.*]] = arith.constant 8 : index
293+
// CHECK: %[[LANE_DATA_Y:.*]] = arith.constant 2 : index
294+
// CHECK: %[[USER_OFFSET_X:.*]] = arith.constant 1 : index
296295
// CHECK: %[[LANE_ID:.*]] = gpu.lane_id
297-
// CHECK: %[[REMU1:.*]] = index.remu %[[LANE_ID]], %[[C4]]
298-
// CHECK: %[[DIVU:.*]] = index.divu %[[LANE_ID]], %[[C4]]
299-
// CHECK: %[[REMU2:.*]] = index.remu %[[DIVU]], %[[C4]]
300-
// CHECK: %[[MUL:.*]] = index.mul %[[REMU2]], %[[C2]]
301-
// CHECK: %[[REMU3:.*]] = index.remu %[[MUL]], %[[C8]]
302-
// CHECK: %[[REMU4:.*]] = index.remu %[[REMU1]], %[[C4]]
303-
// CHECK: %[[ADD:.*]] = index.add %[[REMU4]], %[[C1]]
304-
// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[REMU3]], %[[ADD]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<2x1xf32>
305-
// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%[[REMU3]], %[[ADD]]] : vector<2x1xf32>, !xegpu.mem_desc<32x32xf32>, index, index
296+
// CHECK: %[[DELINEARIZED_LANE_Y:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]]
297+
// CHECK: %[[DELINEARIZED_LANE_X:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]]
298+
// CHECK: %[[LANE_Y_OFFSET_1:.*]] = index.mul %[[DELINEARIZED_LANE_Y]], %[[LANE_DATA_Y]]
299+
// CHECK: %[[LANE_Y_OFFSET:.*]] = index.remu %[[LANE_Y_OFFSET_1]], %[[DIST_UNIT_HEIGHT_Y]]
300+
// CHECK: %[[LANE_X_OFFSET_1:.*]] = index.remu %[[DELINEARIZED_LANE_X]], %[[DIST_UNIT_HEIGHT_X]]
301+
// CHECK: %[[LANE_X_OFFSET:.*]] = index.add %[[LANE_X_OFFSET_1]], %[[USER_OFFSET_X]]
302+
// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<2x1xf32>
303+
// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : vector<2x1xf32>, !xegpu.mem_desc<32x32xf32>, index, index
306304
gpu.module @xevm_module{
307305
gpu.func @load_store_matrix_2(%arg0: !xegpu.mem_desc<32x32xf32>) {
308306
%c0 = arith.constant 0 : index
Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,33 @@
11
// RUN: mlir-opt --test-xegpu-layout-interface --cse -split-input-file %s | FileCheck %s
22

3+
//CHECk: #map = affine_map<()[s0] -> (s0 floordiv 8)>
34
gpu.module @test {
45
gpu.func @slice_attr() -> vector<128xindex> {
5-
// CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
6-
// CHECK-DAG: %[[DIVU:.*]] = index.divu %[[SGID]], %[[C8:.*]]
7-
// CHECK-DAG: %[[REMU:.*]] = index.remu %[[DIVU]], %[[C4:.*]]
8-
// CHECK-DAG: %[[MUL:.*]] = index.mul %[[REMU]], %[[C32:.*]]
9-
// CHECK-DAG: %[[MOD:.*]] = index.remu %[[MUL]], %[[C128:.*]]
10-
// CHECK-DAG: %[[BASE:.*]] = vector.step : vector<32xindex>
11-
// CHECK-DAG: %[[CAST:.*]] = vector.broadcast %[[MOD]] : index to vector<32xindex>
12-
// CHECK-DAG: %[[ADD:.*]] = arith.addi %[[BASE]], %[[CAST]] : vector<32xindex>
6+
//CHECK: [[sgId:%.+]] = gpu.subgroup_id : index
7+
//CHECK: [[IDY:%.+]] = affine.apply #map()[[[sgId]]]
8+
//CHECK: [[c32:%.+]] = arith.constant 32 : index
9+
//CHECK: [[LOCALY:%.+]] = index.mul [[IDY]], [[c32]]
10+
//CHECK: [[c128:%.+]] = arith.constant 128 : index
11+
//CHECK: [[MODY:%.+]] = index.remu [[LOCALY]], [[c128]]
12+
//CHECK: [[BASE:%.+]] = vector.step : vector<32xindex>
13+
//CHECK: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<32xindex>
14+
//CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<32xindex>
1315
%step = vector.step {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [4, 8], sg_data = [32, 32]>, dims = [1]>}: vector<128xindex>
1416
gpu.return %step : vector<128xindex>
1517
}
1618

1719
gpu.func @nested_slice_attr() -> vector<128xindex> {
18-
// CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
19-
// CHECK-DAG: %[[DIVU1:.*]] = index.divu %[[SGID]], %[[C1:.*]]
20-
// CHECK-DAG: %[[DIVU2:.*]] = index.divu %[[DIVU1]], %[[C8:.*]]
21-
// CHECK-DAG: %[[REMU:.*]] = index.remu %[[DIVU2]], %[[C4:.*]]
22-
// CHECK-DAG: %[[MUL:.*]] = index.mul %[[REMU]], %[[C32:.*]]
23-
// CHECK-DAG: %[[MOD:.*]] = index.remu %[[MUL]], %[[C128:.*]]
24-
// CHECK-DAG: %[[BASE:.*]] = vector.step : vector<32xindex>
25-
// CHECK-DAG: %[[CAST:.*]] = vector.broadcast %[[MOD]] : index to vector<32xindex>
26-
// CHECK-DAG: %[[ADD:.*]] = arith.addi %[[BASE]], %[[CAST]] : vector<32xindex>
20+
//CHECK: [[sgId:%.+]] = gpu.subgroup_id : index
21+
//CHECK: [[IDY:%.+]] = affine.apply #map()[[[sgId]]]
22+
//CHECK: [[c32:%.+]] = arith.constant 32 : index
23+
//CHECK: [[LOCALY:%.+]] = index.mul [[IDY]], [[c32]]
24+
//CHECK: [[c128:%.+]] = arith.constant 128 : index
25+
//CHECK: [[MODY:%.+]] = index.remu [[LOCALY]], [[c128]]
26+
//CHECK: [[BASE:%.+]] = vector.step : vector<32xindex>
27+
//CHECK: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<32xindex>
28+
//CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<32xindex>
2729
%0 = vector.step {layout_result_0 = #xegpu.slice<#xegpu.slice<#xegpu.layout<sg_layout = [4, 8, 1], sg_data = [32, 32, 1]>, dims = [2]>, dims = [1]>} : vector<128xindex>
2830
gpu.return %0 : vector<128xindex>
2931
}
3032

31-
}
32-
33+
}

mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-elemwise.mlir

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,14 @@ gpu.module @test_elementwise_ops {
166166
%load_b = xegpu.load_nd %tdesc_b
167167
: !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>>
168168
-> vector<24x32xf32>
169-
// CHECK-COUNT-12: arith.negf {{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>} : vector<2x2xf32>
169+
// CHECK-COUNT-12: arith.negf {{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>}
170+
// CHECK-SAME-COUNT-12: : vector<2x2xf32>
170171
// CHECK-NOT: arith.negf
171172
%negf = arith.negf %load_a
172173
{layout_result_0 = #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>}
173174
: vector<24x32xf32>
174-
// CHECK-COUNT-12: math.powf {{.*}}, {{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>} : vector<2x2xf32>
175+
// CHECK-COUNT-12: math.powf {{.*}}, {{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>}
176+
// CHECK-SAME-COUNT-12: : vector<2x2xf32>
175177
// CHECK-NOT: math.powf
176178
%powf = math.powf %load_a, %load_b
177179
{layout_result_0 = #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>}

0 commit comments

Comments
 (0)