Skip to content

Commit 0acfbe9

Browse files
committed
Merge tag 'misc-habanalabs-fixes-2020-12-30' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-linus
Oded writes: This tag contains the following fixes for 5.11-rc2: - Fixes that are needed for supporting the new F/W with security features: - Correctly fetch PLL information in GOYA when security is enabled in F/W - Fix hard-reset support when F/W is in its preboot stage - Disable clock gating when initializing the H/W - Fix hard-reset procedure - Fix PCI controller initialization - Remove setting of Engine-Barrier in collective wait operations. This barrier created a drop in performance - Retry loading the TPC firmware in case of EINTR during loading - Fix CS counters - Register to PCI shutdown callback to fix handling of VM shutdown - Fix order of status check - Fix memory leak in reset procedure - Fix and add comments and fix indentations * tag 'misc-habanalabs-fixes-2020-12-30' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: habanalabs: Fix memleak in hl_device_reset habanalabs: fix order of status check habanalabs: register to pci shutdown callback habanalabs: add validation cs counter, fix misplaced counters habanalabs/gaudi: retry loading TPC f/w on -EINTR habanalabs: adjust pci controller init to new firmware habanalabs: update comment in hl_boot_if.h habanalabs/gaudi: enhance reset message habanalabs: full FW hard reset support habanalabs/gaudi: disable CGM at HW initialization habanalabs: Revise comment to align with mirror list name habanalabs/gaudi: do not set EB in collective slave queues habanalabs: preboot hard reset support habanalabs: remove generic gaudi get_pll_freq function habanalabs: fetch PSOC PLL frequency from F/W in goya habanalabs: add comment for pll frequency ioctl opcode habanalabs: Fix a missing-braces warning
2 parents 5c8fe58 + b000700 commit 0acfbe9

File tree

14 files changed

+281
-199
lines changed

14 files changed

+281
-199
lines changed

drivers/misc/habanalabs/common/command_submission.c

Lines changed: 58 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
472472
cntr = &hdev->aggregated_cs_counters;
473473

474474
cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
475-
if (!cs)
475+
if (!cs) {
476+
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
477+
atomic64_inc(&cntr->out_of_mem_drop_cnt);
476478
return -ENOMEM;
479+
}
477480

478481
cs->ctx = ctx;
479482
cs->submitted = false;
@@ -486,6 +489,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
486489

487490
cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
488491
if (!cs_cmpl) {
492+
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
493+
atomic64_inc(&cntr->out_of_mem_drop_cnt);
489494
rc = -ENOMEM;
490495
goto free_cs;
491496
}
@@ -513,6 +518,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
513518
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
514519
sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
515520
if (!cs->jobs_in_queue_cnt) {
521+
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
522+
atomic64_inc(&cntr->out_of_mem_drop_cnt);
516523
rc = -ENOMEM;
517524
goto free_fence;
518525
}
@@ -562,7 +569,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
562569
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
563570
flush_workqueue(hdev->cq_wq[i]);
564571

565-
/* Make sure we don't have leftovers in the H/W queues mirror list */
572+
/* Make sure we don't have leftovers in the CS mirror list */
566573
list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
567574
cs_get(cs);
568575
cs->aborted = true;
@@ -764,11 +771,14 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
764771

765772
static int hl_cs_copy_chunk_array(struct hl_device *hdev,
766773
struct hl_cs_chunk **cs_chunk_array,
767-
void __user *chunks, u32 num_chunks)
774+
void __user *chunks, u32 num_chunks,
775+
struct hl_ctx *ctx)
768776
{
769777
u32 size_to_copy;
770778

771779
if (num_chunks > HL_MAX_JOBS_PER_CS) {
780+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
781+
atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
772782
dev_err(hdev->dev,
773783
"Number of chunks can NOT be larger than %d\n",
774784
HL_MAX_JOBS_PER_CS);
@@ -777,11 +787,16 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
777787

778788
*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
779789
GFP_ATOMIC);
780-
if (!*cs_chunk_array)
790+
if (!*cs_chunk_array) {
791+
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
792+
atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
781793
return -ENOMEM;
794+
}
782795

783796
size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
784797
if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
798+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
799+
atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
785800
dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
786801
kfree(*cs_chunk_array);
787802
return -EFAULT;
@@ -797,6 +812,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
797812
struct hl_device *hdev = hpriv->hdev;
798813
struct hl_cs_chunk *cs_chunk_array;
799814
struct hl_cs_counters_atomic *cntr;
815+
struct hl_ctx *ctx = hpriv->ctx;
800816
struct hl_cs_job *job;
801817
struct hl_cs *cs;
802818
struct hl_cb *cb;
@@ -805,7 +821,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
805821
cntr = &hdev->aggregated_cs_counters;
806822
*cs_seq = ULLONG_MAX;
807823

808-
rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
824+
rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
825+
hpriv->ctx);
809826
if (rc)
810827
goto out;
811828

@@ -832,17 +849,17 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
832849
rc = validate_queue_index(hdev, chunk, &queue_type,
833850
&is_kernel_allocated_cb);
834851
if (rc) {
835-
atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
836-
atomic64_inc(&cntr->parsing_drop_cnt);
852+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
853+
atomic64_inc(&cntr->validation_drop_cnt);
837854
goto free_cs_object;
838855
}
839856

840857
if (is_kernel_allocated_cb) {
841858
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
842859
if (!cb) {
843860
atomic64_inc(
844-
&hpriv->ctx->cs_counters.parsing_drop_cnt);
845-
atomic64_inc(&cntr->parsing_drop_cnt);
861+
&ctx->cs_counters.validation_drop_cnt);
862+
atomic64_inc(&cntr->validation_drop_cnt);
846863
rc = -EINVAL;
847864
goto free_cs_object;
848865
}
@@ -856,8 +873,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
856873
job = hl_cs_allocate_job(hdev, queue_type,
857874
is_kernel_allocated_cb);
858875
if (!job) {
859-
atomic64_inc(
860-
&hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
876+
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
861877
atomic64_inc(&cntr->out_of_mem_drop_cnt);
862878
dev_err(hdev->dev, "Failed to allocate a new job\n");
863879
rc = -ENOMEM;
@@ -891,7 +907,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
891907

892908
rc = cs_parser(hpriv, job);
893909
if (rc) {
894-
atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
910+
atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
895911
atomic64_inc(&cntr->parsing_drop_cnt);
896912
dev_err(hdev->dev,
897913
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
@@ -901,8 +917,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
901917
}
902918

903919
if (int_queues_only) {
904-
atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
905-
atomic64_inc(&cntr->parsing_drop_cnt);
920+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
921+
atomic64_inc(&cntr->validation_drop_cnt);
906922
dev_err(hdev->dev,
907923
"Reject CS %d.%llu because only internal queues jobs are present\n",
908924
cs->ctx->asid, cs->sequence);
@@ -1042,7 +1058,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
10421058
}
10431059

10441060
static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1045-
struct hl_cs_chunk *chunk, u64 *signal_seq)
1061+
struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
10461062
{
10471063
u64 *signal_seq_arr = NULL;
10481064
u32 size_to_copy, signal_seq_arr_len;
@@ -1052,6 +1068,8 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
10521068

10531069
/* currently only one signal seq is supported */
10541070
if (signal_seq_arr_len != 1) {
1071+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1072+
atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
10551073
dev_err(hdev->dev,
10561074
"Wait for signal CS supports only one signal CS seq\n");
10571075
return -EINVAL;
@@ -1060,13 +1078,18 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
10601078
signal_seq_arr = kmalloc_array(signal_seq_arr_len,
10611079
sizeof(*signal_seq_arr),
10621080
GFP_ATOMIC);
1063-
if (!signal_seq_arr)
1081+
if (!signal_seq_arr) {
1082+
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1083+
atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
10641084
return -ENOMEM;
1085+
}
10651086

10661087
size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
10671088
if (copy_from_user(signal_seq_arr,
10681089
u64_to_user_ptr(chunk->signal_seq_arr),
10691090
size_to_copy)) {
1091+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1092+
atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
10701093
dev_err(hdev->dev,
10711094
"Failed to copy signal seq array from user\n");
10721095
rc = -EFAULT;
@@ -1153,23 +1176,28 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
11531176
struct hl_device *hdev = hpriv->hdev;
11541177
struct hl_cs_compl *sig_waitcs_cmpl;
11551178
u32 q_idx, collective_engine_id = 0;
1179+
struct hl_cs_counters_atomic *cntr;
11561180
struct hl_fence *sig_fence = NULL;
11571181
struct hl_ctx *ctx = hpriv->ctx;
11581182
enum hl_queue_type q_type;
11591183
struct hl_cs *cs;
11601184
u64 signal_seq;
11611185
int rc;
11621186

1187+
cntr = &hdev->aggregated_cs_counters;
11631188
*cs_seq = ULLONG_MAX;
11641189

1165-
rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
1190+
rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1191+
ctx);
11661192
if (rc)
11671193
goto out;
11681194

11691195
/* currently it is guaranteed to have only one chunk */
11701196
chunk = &cs_chunk_array[0];
11711197

11721198
if (chunk->queue_index >= hdev->asic_prop.max_queues) {
1199+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1200+
atomic64_inc(&cntr->validation_drop_cnt);
11731201
dev_err(hdev->dev, "Queue index %d is invalid\n",
11741202
chunk->queue_index);
11751203
rc = -EINVAL;
@@ -1181,6 +1209,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
11811209
q_type = hw_queue_prop->type;
11821210

11831211
if (!hw_queue_prop->supports_sync_stream) {
1212+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1213+
atomic64_inc(&cntr->validation_drop_cnt);
11841214
dev_err(hdev->dev,
11851215
"Queue index %d does not support sync stream operations\n",
11861216
q_idx);
@@ -1190,6 +1220,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
11901220

11911221
if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
11921222
if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1223+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1224+
atomic64_inc(&cntr->validation_drop_cnt);
11931225
dev_err(hdev->dev,
11941226
"Queue index %d is invalid\n", q_idx);
11951227
rc = -EINVAL;
@@ -1200,12 +1232,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
12001232
}
12011233

12021234
if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
1203-
rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq);
1235+
rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
12041236
if (rc)
12051237
goto free_cs_chunk_array;
12061238

12071239
sig_fence = hl_ctx_get_fence(ctx, signal_seq);
12081240
if (IS_ERR(sig_fence)) {
1241+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1242+
atomic64_inc(&cntr->validation_drop_cnt);
12091243
dev_err(hdev->dev,
12101244
"Failed to get signal CS with seq 0x%llx\n",
12111245
signal_seq);
@@ -1223,6 +1257,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
12231257
container_of(sig_fence, struct hl_cs_compl, base_fence);
12241258

12251259
if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
1260+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1261+
atomic64_inc(&cntr->validation_drop_cnt);
12261262
dev_err(hdev->dev,
12271263
"CS seq 0x%llx is not of a signal CS\n",
12281264
signal_seq);
@@ -1270,8 +1306,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
12701306
else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
12711307
rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
12721308
cs, q_idx, collective_engine_id);
1273-
else
1309+
else {
1310+
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1311+
atomic64_inc(&cntr->validation_drop_cnt);
12741312
rc = -EINVAL;
1313+
}
12751314

12761315
if (rc)
12771316
goto free_cs_object;

drivers/misc/habanalabs/common/device.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
1717
{
1818
enum hl_device_status status;
1919

20-
if (hdev->disabled)
21-
status = HL_DEVICE_STATUS_MALFUNCTION;
22-
else if (atomic_read(&hdev->in_reset))
20+
if (atomic_read(&hdev->in_reset))
2321
status = HL_DEVICE_STATUS_IN_RESET;
2422
else if (hdev->needs_reset)
2523
status = HL_DEVICE_STATUS_NEEDS_RESET;
24+
else if (hdev->disabled)
25+
status = HL_DEVICE_STATUS_MALFUNCTION;
2626
else
2727
status = HL_DEVICE_STATUS_OPERATIONAL;
2828

@@ -1092,6 +1092,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
10921092
GFP_KERNEL);
10931093
if (!hdev->kernel_ctx) {
10941094
rc = -ENOMEM;
1095+
hl_mmu_fini(hdev);
10951096
goto out_err;
10961097
}
10971098

@@ -1103,6 +1104,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
11031104
"failed to init kernel ctx in hard reset\n");
11041105
kfree(hdev->kernel_ctx);
11051106
hdev->kernel_ctx = NULL;
1107+
hl_mmu_fini(hdev);
11061108
goto out_err;
11071109
}
11081110
}

0 commit comments

Comments
 (0)