Skip to content

Commit b78cda7

Browse files
committed
Merge tag 'misc-habanalabs-next-2019-11-21' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next
Oded writes: This tag contains the following changes for kernel 5.5: - MMU code improvements that includes: - Distinguish between "normal" unmapping and unmapping that is done as part of the tear-down of a user process. This improves performance of unmapping during reset of the device. - Add future ASIC support in generic MMU code. - Improve device reset code by adding more protection around accessing the device during the reset process. - Add new H/W queue type for future ASIC support - Add more information to be retrieved by users through INFO IOCTL: - clock rate - board name - reset counters - Small bug fixes and minor improvements to code. * tag 'misc-habanalabs-next-2019-11-21' of git://people.freedesktop.org/~gabbayo/linux: (31 commits) habanalabs: add more protection of device during reset habanalabs: flush EQ workers in hard reset habanalabs: make the reset code more consistent habanalabs: expose reset counters via existing INFO IOCTL habanalabs: make code more concise habanalabs: use defines for F/W files habanalabs: remove prints on successful device initialization habanalabs: remove unnecessary checks habanalabs: invalidate MMU cache only once habanalabs: skip VA block list update in reset flow habanalabs: optimize MMU unmap habanalabs: prevent read/write from/to the device during hard reset habanalabs: split MMU properties to PCI/DRAM habanalabs: re-factor MMU masks and documentation habanalabs: type specific MMU cache invalidation habanalabs: re-factor memory module code habanalabs: export uapi defines to user-space habanalabs: don't print error when queues are full habanalabs: increase max jobs number to 512 habanalabs: set ETR as non-secured ...
2 parents 599ea01 + 5feccdd commit b78cda7

File tree

20 files changed

+1159
-542
lines changed

20 files changed

+1159
-542
lines changed

drivers/misc/habanalabs/command_submission.c

Lines changed: 87 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,18 @@ static void cs_put(struct hl_cs *cs)
6565
kref_put(&cs->refcount, cs_do_release);
6666
}
6767

68+
static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
69+
{
70+
/*
71+
* Patched CB is created for external queues jobs, and for H/W queues
72+
* jobs if the user CB was allocated by driver and MMU is disabled.
73+
*/
74+
return (job->queue_type == QUEUE_TYPE_EXT ||
75+
(job->queue_type == QUEUE_TYPE_HW &&
76+
job->is_kernel_allocated_cb &&
77+
!hdev->mmu_enable));
78+
}
79+
6880
/*
6981
* cs_parser - parse the user command submission
7082
*
@@ -91,11 +103,13 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
91103
parser.patched_cb = NULL;
92104
parser.user_cb = job->user_cb;
93105
parser.user_cb_size = job->user_cb_size;
94-
parser.ext_queue = job->ext_queue;
106+
parser.queue_type = job->queue_type;
107+
parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
95108
job->patched_cb = NULL;
96109

97110
rc = hdev->asic_funcs->cs_parser(hdev, &parser);
98-
if (job->ext_queue) {
111+
112+
if (is_cb_patched(hdev, job)) {
99113
if (!rc) {
100114
job->patched_cb = parser.patched_cb;
101115
job->job_cb_size = parser.patched_cb_size;
@@ -124,7 +138,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
124138
{
125139
struct hl_cs *cs = job->cs;
126140

127-
if (job->ext_queue) {
141+
if (is_cb_patched(hdev, job)) {
128142
hl_userptr_delete_list(hdev, &job->userptr_list);
129143

130144
/*
@@ -140,6 +154,19 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
140154
}
141155
}
142156

157+
/* For H/W queue jobs, if a user CB was allocated by driver and MMU is
158+
* enabled, the user CB isn't released in cs_parser() and thus should be
159+
* released here.
160+
*/
161+
if (job->queue_type == QUEUE_TYPE_HW &&
162+
job->is_kernel_allocated_cb && hdev->mmu_enable) {
163+
spin_lock(&job->user_cb->lock);
164+
job->user_cb->cs_cnt--;
165+
spin_unlock(&job->user_cb->lock);
166+
167+
hl_cb_put(job->user_cb);
168+
}
169+
143170
/*
144171
* This is the only place where there can be multiple threads
145172
* modifying the list at the same time
@@ -150,7 +177,8 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
150177

151178
hl_debugfs_remove_job(hdev, job);
152179

153-
if (job->ext_queue)
180+
if (job->queue_type == QUEUE_TYPE_EXT ||
181+
job->queue_type == QUEUE_TYPE_HW)
154182
cs_put(cs);
155183

156184
kfree(job);
@@ -387,39 +415,43 @@ static void job_wq_completion(struct work_struct *work)
387415
free_job(hdev, job);
388416
}
389417

390-
static struct hl_cb *validate_queue_index(struct hl_device *hdev,
391-
struct hl_cb_mgr *cb_mgr,
392-
struct hl_cs_chunk *chunk,
393-
bool *ext_queue)
418+
static int validate_queue_index(struct hl_device *hdev,
419+
struct hl_cs_chunk *chunk,
420+
enum hl_queue_type *queue_type,
421+
bool *is_kernel_allocated_cb)
394422
{
395423
struct asic_fixed_properties *asic = &hdev->asic_prop;
396424
struct hw_queue_properties *hw_queue_prop;
397-
u32 cb_handle;
398-
struct hl_cb *cb;
399-
400-
/* Assume external queue */
401-
*ext_queue = true;
402425

403426
hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
404427

405428
if ((chunk->queue_index >= HL_MAX_QUEUES) ||
406429
(hw_queue_prop->type == QUEUE_TYPE_NA)) {
407430
dev_err(hdev->dev, "Queue index %d is invalid\n",
408431
chunk->queue_index);
409-
return NULL;
432+
return -EINVAL;
410433
}
411434

412435
if (hw_queue_prop->driver_only) {
413436
dev_err(hdev->dev,
414437
"Queue index %d is restricted for the kernel driver\n",
415438
chunk->queue_index);
416-
return NULL;
417-
} else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
418-
*ext_queue = false;
419-
return (struct hl_cb *) (uintptr_t) chunk->cb_handle;
439+
return -EINVAL;
420440
}
421441

422-
/* Retrieve CB object */
442+
*queue_type = hw_queue_prop->type;
443+
*is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
444+
445+
return 0;
446+
}
447+
448+
static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
449+
struct hl_cb_mgr *cb_mgr,
450+
struct hl_cs_chunk *chunk)
451+
{
452+
struct hl_cb *cb;
453+
u32 cb_handle;
454+
423455
cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
424456

425457
cb = hl_cb_get(hdev, cb_mgr, cb_handle);
@@ -444,20 +476,23 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
444476
return NULL;
445477
}
446478

447-
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
479+
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
480+
enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
448481
{
449482
struct hl_cs_job *job;
450483

451484
job = kzalloc(sizeof(*job), GFP_ATOMIC);
452485
if (!job)
453486
return NULL;
454487

455-
job->ext_queue = ext_queue;
488+
job->queue_type = queue_type;
489+
job->is_kernel_allocated_cb = is_kernel_allocated_cb;
456490

457-
if (job->ext_queue) {
491+
if (is_cb_patched(hdev, job))
458492
INIT_LIST_HEAD(&job->userptr_list);
493+
494+
if (job->queue_type == QUEUE_TYPE_EXT)
459495
INIT_WORK(&job->finish_work, job_wq_completion);
460-
}
461496

462497
return job;
463498
}
@@ -470,7 +505,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
470505
struct hl_cs_job *job;
471506
struct hl_cs *cs;
472507
struct hl_cb *cb;
473-
bool ext_queue_present = false;
508+
bool int_queues_only = true;
474509
u32 size_to_copy;
475510
int rc, i, parse_cnt;
476511

@@ -514,23 +549,33 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
514549
/* Validate ALL the CS chunks before submitting the CS */
515550
for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
516551
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
517-
bool ext_queue;
552+
enum hl_queue_type queue_type;
553+
bool is_kernel_allocated_cb;
554+
555+
rc = validate_queue_index(hdev, chunk, &queue_type,
556+
&is_kernel_allocated_cb);
557+
if (rc)
558+
goto free_cs_object;
518559

519-
cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk,
520-
&ext_queue);
521-
if (ext_queue) {
522-
ext_queue_present = true;
560+
if (is_kernel_allocated_cb) {
561+
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
523562
if (!cb) {
524563
rc = -EINVAL;
525564
goto free_cs_object;
526565
}
566+
} else {
567+
cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
527568
}
528569

529-
job = hl_cs_allocate_job(hdev, ext_queue);
570+
if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
571+
int_queues_only = false;
572+
573+
job = hl_cs_allocate_job(hdev, queue_type,
574+
is_kernel_allocated_cb);
530575
if (!job) {
531576
dev_err(hdev->dev, "Failed to allocate a new job\n");
532577
rc = -ENOMEM;
533-
if (ext_queue)
578+
if (is_kernel_allocated_cb)
534579
goto release_cb;
535580
else
536581
goto free_cs_object;
@@ -540,7 +585,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
540585
job->cs = cs;
541586
job->user_cb = cb;
542587
job->user_cb_size = chunk->cb_size;
543-
if (job->ext_queue)
588+
if (is_kernel_allocated_cb)
544589
job->job_cb_size = cb->size;
545590
else
546591
job->job_cb_size = chunk->cb_size;
@@ -553,10 +598,11 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
553598
/*
554599
* Increment CS reference. When CS reference is 0, CS is
555600
* done and can be signaled to user and free all its resources
556-
* Only increment for JOB on external queues, because only
557-
* for those JOBs we get completion
601+
* Only increment for JOB on external or H/W queues, because
602+
* only for those JOBs we get completion
558603
*/
559-
if (job->ext_queue)
604+
if (job->queue_type == QUEUE_TYPE_EXT ||
605+
job->queue_type == QUEUE_TYPE_HW)
560606
cs_get(cs);
561607

562608
hl_debugfs_add_job(hdev, job);
@@ -570,19 +616,20 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
570616
}
571617
}
572618

573-
if (!ext_queue_present) {
619+
if (int_queues_only) {
574620
dev_err(hdev->dev,
575-
"Reject CS %d.%llu because no external queues jobs\n",
621+
"Reject CS %d.%llu because only internal queues jobs are present\n",
576622
cs->ctx->asid, cs->sequence);
577623
rc = -EINVAL;
578624
goto free_cs_object;
579625
}
580626

581627
rc = hl_hw_queue_schedule_cs(cs);
582628
if (rc) {
583-
dev_err(hdev->dev,
584-
"Failed to submit CS %d.%llu to H/W queues, error %d\n",
585-
cs->ctx->asid, cs->sequence, rc);
629+
if (rc != -EAGAIN)
630+
dev_err(hdev->dev,
631+
"Failed to submit CS %d.%llu to H/W queues, error %d\n",
632+
cs->ctx->asid, cs->sequence, rc);
586633
goto free_cs_object;
587634
}
588635

0 commit comments

Comments
 (0)