Skip to content

Commit 72d6625

Browse files
osharabiogabbay
authored andcommitted
habanalabs: modify multi-CS to wait on stream masters
During the integration, the multi-CS requirements were refined: - The multi CS call shall wait on "per-ASIC" predefined stream masters instead of set of streams. - Stream masters are set of QIDs used by the upper SW layers (synapse) for completion (must be an external/HW queue). Signed-off-by: Ohad Sharabi <[email protected]> Reviewed-by: Oded Gabbay <[email protected]> Signed-off-by: Oded Gabbay <[email protected]>
1 parent 1f6bdee commit 72d6625

File tree

6 files changed

+77
-28
lines changed

6 files changed

+77
-28
lines changed

drivers/misc/habanalabs/common/command_submission.c

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -487,14 +487,15 @@ static void force_complete_multi_cs(struct hl_device *hdev)
487487
*
488488
* @hdev: pointer to habanalabs device structure
489489
* @cs: CS structure
490-
*
491-
* The function signals waiting entity that its waiting stream has common
492-
* stream with the completed CS.
490+
* The function signals a waiting entity that has an overlapping stream masters
491+
* with the completed CS.
493492
* For example:
494-
* - a completed CS worked on streams 0 and 1, multi CS completion
495-
* is actively waiting on stream 3. don't send signal as no common stream
496-
* - a completed CS worked on streams 0 and 1, multi CS completion
497-
* is actively waiting on streams 1 and 3. send signal as stream 1 is common
493+
* - a completed CS worked on stream master QID 4, multi CS completion
494+
* is actively waiting on stream master QIDs 3, 5. don't send signal as no
495+
* common stream master QID
496+
* - a completed CS worked on stream master QID 4, multi CS completion
497+
* is actively waiting on stream master QIDs 3, 4. send signal as stream
498+
* master QID 4 is common
498499
*/
499500
static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
500501
{
@@ -518,10 +519,11 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
518519
* complete if:
519520
* 1. still waiting for completion
520521
* 2. the completed CS has at least one overlapping stream
521-
* with the streams in the completion
522+
* master with the stream masters in the completion
522523
*/
523524
if (mcs_compl->used &&
524-
(fence->stream_map & mcs_compl->stream_map)) {
525+
(fence->stream_master_qid_map &
526+
mcs_compl->stream_master_qid_map)) {
525527
/* extract the timestamp only of first completed CS */
526528
if (!mcs_compl->timestamp)
527529
mcs_compl->timestamp =
@@ -1228,6 +1230,17 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
12281230
return 0;
12291231
}
12301232

1233+
static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
1234+
{
1235+
int i;
1236+
1237+
for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
1238+
if (qid == hdev->stream_master_qid_arr[i])
1239+
return BIT(i);
1240+
1241+
return 0;
1242+
}
1243+
12311244
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
12321245
u32 num_chunks, u64 *cs_seq, u32 flags,
12331246
u32 encaps_signals_handle, u32 timeout)
@@ -1241,7 +1254,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
12411254
struct hl_cs *cs;
12421255
struct hl_cb *cb;
12431256
u64 user_sequence;
1244-
u8 stream_map = 0;
1257+
u8 stream_master_qid_map = 0;
12451258
int rc, i;
12461259

12471260
cntr = &hdev->aggregated_cs_counters;
@@ -1310,7 +1323,9 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
13101323
* queues of this CS
13111324
*/
13121325
if (hdev->supports_wait_for_multi_cs)
1313-
stream_map |= BIT((chunk->queue_index % 4));
1326+
stream_master_qid_map |=
1327+
get_stream_master_qid_mask(hdev,
1328+
chunk->queue_index);
13141329
}
13151330

13161331
job = hl_cs_allocate_job(hdev, queue_type,
@@ -1378,7 +1393,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
13781393
* fence object for multi-CS completion
13791394
*/
13801395
if (hdev->supports_wait_for_multi_cs)
1381-
cs->fence->stream_map = stream_map;
1396+
cs->fence->stream_master_qid_map = stream_master_qid_map;
13821397

13831398
rc = hl_hw_queue_schedule_cs(cs);
13841399
if (rc) {
@@ -2332,7 +2347,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
23322347
break;
23332348
}
23342349

2335-
mcs_data->stream_map |= fence->stream_map;
2350+
mcs_data->stream_master_qid_map |= fence->stream_master_qid_map;
23362351

23372352
if (status == CS_WAIT_STATUS_BUSY)
23382353
continue;
@@ -2394,7 +2409,8 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
23942409
* hl_wait_multi_cs_completion_init - init completion structure
23952410
*
23962411
* @hdev: pointer to habanalabs device structure
2397-
* @stream_map: stream map, set bit indicates stream to wait on
2412+
* @stream_master_bitmap: stream master QIDs map, set bit indicates stream
2413+
* master QID to wait on
23982414
*
23992415
* @return valid completion struct pointer on success, otherwise error pointer
24002416
*
@@ -2404,7 +2420,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
24042420
*/
24052421
static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
24062422
struct hl_device *hdev,
2407-
u8 stream_map)
2423+
u8 stream_master_bitmap)
24082424
{
24092425
struct multi_cs_completion *mcs_compl;
24102426
int i;
@@ -2416,7 +2432,7 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
24162432
if (!mcs_compl->used) {
24172433
mcs_compl->used = 1;
24182434
mcs_compl->timestamp = 0;
2419-
mcs_compl->stream_map = stream_map;
2435+
mcs_compl->stream_master_qid_map = stream_master_bitmap;
24202436
reinit_completion(&mcs_compl->completion);
24212437
spin_unlock(&mcs_compl->lock);
24222438
break;
@@ -2464,7 +2480,7 @@ static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data)
24642480
long completion_rc;
24652481

24662482
mcs_compl = hl_wait_multi_cs_completion_init(hdev,
2467-
mcs_data->stream_map);
2483+
mcs_data->stream_master_qid_map);
24682484
if (IS_ERR(mcs_compl))
24692485
return PTR_ERR(mcs_compl);
24702486

drivers/misc/habanalabs/common/habanalabs.h

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -592,18 +592,18 @@ struct asic_fixed_properties {
592592
* @completion: fence is implemented using completion
593593
* @refcount: refcount for this fence
594594
* @cs_sequence: sequence of the corresponding command submission
595+
* @stream_master_qid_map: streams masters QID bitmap to represent all streams
596+
* masters QIDs that multi cs is waiting on
595597
* @error: mark this fence with error
596598
* @timestamp: timestamp upon completion
597-
* @stream_map: streams bitmap to represent all streams that multi cs is
598-
* waiting on
599599
*/
600600
struct hl_fence {
601601
struct completion completion;
602602
struct kref refcount;
603603
u64 cs_sequence;
604+
u32 stream_master_qid_map;
604605
int error;
605606
ktime_t timestamp;
606-
u8 stream_map;
607607
};
608608

609609
/**
@@ -1160,6 +1160,7 @@ struct fw_load_mgr {
11601160
* @state_dump_init: initialize constants required for state dump
11611161
* @get_sob_addr: get SOB base address offset.
11621162
* @set_pci_memory_regions: setting properties of PCI memory regions
1163+
* @get_stream_master_qid_arr: get pointer to stream masters QID array
11631164
*/
11641165
struct hl_asic_funcs {
11651166
int (*early_init)(struct hl_device *hdev);
@@ -1289,6 +1290,7 @@ struct hl_asic_funcs {
12891290
void (*state_dump_init)(struct hl_device *hdev);
12901291
u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
12911292
void (*set_pci_memory_regions)(struct hl_device *hdev);
1293+
u32* (*get_stream_master_qid_arr)(void);
12921294
};
12931295

12941296

@@ -2263,16 +2265,16 @@ struct hl_mmu_funcs {
22632265
* @completion: completion of any of the CS in the list
22642266
* @lock: spinlock for the completion structure
22652267
* @timestamp: timestamp for the multi-CS completion
2268+
* @stream_master_qid_map: bitmap of all stream masters on which the multi-CS
2269+
* is waiting
22662270
* @used: 1 if in use, otherwise 0
2267-
* @stream_map: bitmap of all HW/external queues streams on which the multi-CS
2268-
* is waiting
22692271
*/
22702272
struct multi_cs_completion {
22712273
struct completion completion;
22722274
spinlock_t lock;
22732275
s64 timestamp;
2276+
u32 stream_master_qid_map;
22742277
u8 used;
2275-
u8 stream_map;
22762278
};
22772279

22782280
/**
@@ -2284,9 +2286,9 @@ struct multi_cs_completion {
22842286
* @timestamp: timestamp of first completed CS
22852287
* @wait_status: wait for CS status
22862288
* @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
2289+
* @stream_master_qid_map: bitmap of all stream master QIDs on which the
2290+
* multi-CS is waiting
22872291
* @arr_len: fence_arr and seq_arr array length
2288-
* @stream_map: bitmap of all HW/external queues streams on which the multi-CS
2289-
* is waiting
22902292
* @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0)
22912293
* @update_ts: update timestamp. 1- update the timestamp, otherwise 0.
22922294
*/
@@ -2298,8 +2300,8 @@ struct multi_cs_data {
22982300
s64 timestamp;
22992301
long wait_status;
23002302
u32 completion_bitmap;
2303+
u32 stream_master_qid_map;
23012304
u8 arr_len;
2302-
u8 stream_map;
23032305
u8 gone_cs;
23042306
u8 update_ts;
23052307
};
@@ -2520,6 +2522,7 @@ struct hl_device {
25202522

25212523
struct multi_cs_completion multi_cs_completion[
25222524
MULTI_CS_MAX_USER_CTX];
2525+
u32 *stream_master_qid_arr;
25232526
atomic64_t dram_used_mem;
25242527
u64 timeout_jiffies;
25252528
u64 max_power;
@@ -2570,6 +2573,7 @@ struct hl_device {
25702573
u8 skip_reset_on_timeout;
25712574
u8 device_cpu_is_halted;
25722575
u8 supports_wait_for_multi_cs;
2576+
u8 stream_master_qid_arr_size;
25732577

25742578
/* Parameters for bring-up */
25752579
u64 nic_ports_mask;

drivers/misc/habanalabs/common/hw_queue.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
721721

722722
/* update stream map of the first CS */
723723
if (hdev->supports_wait_for_multi_cs)
724-
staged_cs->fence->stream_map |= cs->fence->stream_map;
724+
staged_cs->fence->stream_master_qid_map |=
725+
cs->fence->stream_master_qid_map;
725726
}
726727

727728
list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);

drivers/misc/habanalabs/gaudi/gaudi.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,17 @@
110110

111111
#define MONITOR_SOB_STRING_SIZE 256
112112

113+
static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114+
GAUDI_QUEUE_ID_DMA_0_0,
115+
GAUDI_QUEUE_ID_DMA_0_1,
116+
GAUDI_QUEUE_ID_DMA_0_2,
117+
GAUDI_QUEUE_ID_DMA_0_3,
118+
GAUDI_QUEUE_ID_DMA_1_0,
119+
GAUDI_QUEUE_ID_DMA_1_1,
120+
GAUDI_QUEUE_ID_DMA_1_2,
121+
GAUDI_QUEUE_ID_DMA_1_3
122+
};
123+
113124
static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
114125
"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
115126
"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -1870,6 +1881,9 @@ static int gaudi_sw_init(struct hl_device *hdev)
18701881
hdev->supports_wait_for_multi_cs = true;
18711882

18721883
hdev->asic_funcs->set_pci_memory_regions(hdev);
1884+
hdev->stream_master_qid_arr =
1885+
hdev->asic_funcs->get_stream_master_qid_arr();
1886+
hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
18731887

18741888
return 0;
18751889

@@ -9352,6 +9366,11 @@ static void gaudi_state_dump_init(struct hl_device *hdev)
93529366
sds->funcs = gaudi_state_dump_funcs;
93539367
}
93549368

9369+
static u32 *gaudi_get_stream_master_qid_arr(void)
9370+
{
9371+
return gaudi_stream_master;
9372+
}
9373+
93559374
static const struct hl_asic_funcs gaudi_funcs = {
93569375
.early_init = gaudi_early_init,
93579376
.early_fini = gaudi_early_fini,
@@ -9440,7 +9459,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
94409459
.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
94419460
.state_dump_init = gaudi_state_dump_init,
94429461
.get_sob_addr = gaudi_get_sob_addr,
9443-
.set_pci_memory_regions = gaudi_set_pci_memory_regions
9462+
.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9463+
.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
94449464
};
94459465

94469466
/**

drivers/misc/habanalabs/gaudi/gaudiP.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + \
3737
NUMBER_OF_CPU_HW_QUEUES)
3838

39+
#define GAUDI_STREAM_MASTER_ARR_SIZE 8
40+
3941
#if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES)
4042
#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
4143
#endif

drivers/misc/habanalabs/goya/goya.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5588,6 +5588,11 @@ static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
55885588
return 0;
55895589
}
55905590

5591+
static u32 *goya_get_stream_master_qid_arr(void)
5592+
{
5593+
return NULL;
5594+
}
5595+
55915596
static const struct hl_asic_funcs goya_funcs = {
55925597
.early_init = goya_early_init,
55935598
.early_fini = goya_early_fini,
@@ -5677,6 +5682,7 @@ static const struct hl_asic_funcs goya_funcs = {
56775682
.state_dump_init = goya_state_dump_init,
56785683
.get_sob_addr = &goya_get_sob_addr,
56795684
.set_pci_memory_regions = goya_set_pci_memory_regions,
5685+
.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
56805686
};
56815687

56825688
/*

0 commit comments

Comments
 (0)