Skip to content

Commit d978c51

Browse files
Alvin Leealexdeucher
authored andcommitted
drm/amd/display: Refactor SubVP calculation to remove FPU
Refactor calculation to remove floating point operations from dmub_srv. To ensure that 32-bit compilation works well, we use the div64 family of macros to do integer division for SubVP-related timing parameters. Cc: Maíra Canal <[email protected]> Cc: Alex Deucher <[email protected]> Cc: Isabella Basso <[email protected]> Cc: Magali Lemes <[email protected]> Tested-by: Daniel Wheeler <[email protected]> Reviewed-by: Samson Tam <[email protected]> Acked-by: Tom Chung <[email protected]> Signed-off-by: Alvin Lee <[email protected]> Co-developed-by: Aurabindo Pillai <[email protected]> Signed-off-by: Aurabindo Pillai <[email protected]> Co-developed-by: Rodrigo Siqueira <[email protected]> Signed-off-by: Rodrigo Siqueira <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 3601d62 commit d978c51

File tree

1 file changed

+44
-45
lines changed

1 file changed

+44
-45
lines changed

drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c

Lines changed: 44 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -417,44 +417,42 @@ static void populate_subvp_cmd_drr_info(struct dc *dc,
417417
struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
418418
struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
419419
struct dc_crtc_timing *drr_timing = &vblank_pipe->stream->timing;
420-
int16_t drr_frame_us = 0;
421-
int16_t min_drr_supported_us = 0;
422-
int16_t max_drr_supported_us = 0;
423-
int16_t max_drr_vblank_us = 0;
424-
int16_t max_drr_mallregion_us = 0;
425-
int16_t mall_region_us = 0;
426-
int16_t prefetch_us = 0;
427-
int16_t subvp_active_us = 0;
428-
int16_t drr_active_us = 0;
429-
int16_t min_vtotal_supported = 0;
430-
int16_t max_vtotal_supported = 0;
420+
uint16_t drr_frame_us = 0;
421+
uint16_t min_drr_supported_us = 0;
422+
uint16_t max_drr_supported_us = 0;
423+
uint16_t max_drr_vblank_us = 0;
424+
uint16_t max_drr_mallregion_us = 0;
425+
uint16_t mall_region_us = 0;
426+
uint16_t prefetch_us = 0;
427+
uint16_t subvp_active_us = 0;
428+
uint16_t drr_active_us = 0;
429+
uint16_t min_vtotal_supported = 0;
430+
uint16_t max_vtotal_supported = 0;
431431

432432
pipe_data->pipe_config.vblank_data.drr_info.drr_in_use = true;
433433
pipe_data->pipe_config.vblank_data.drr_info.use_ramping = false; // for now don't use ramping
434434
pipe_data->pipe_config.vblank_data.drr_info.drr_window_size_ms = 4; // hardcode 4ms DRR window for now
435435

436-
drr_frame_us = div64_s64(drr_timing->v_total * drr_timing->h_total,
437-
(int64_t)(drr_timing->pix_clk_100hz * 100) * 1000000);
436+
drr_frame_us = div64_u64(((uint64_t)drr_timing->v_total * drr_timing->h_total * 1000000),
437+
(((uint64_t)drr_timing->pix_clk_100hz * 100)));
438438
// P-State allow width and FW delays already included phantom_timing->v_addressable
439-
mall_region_us = div64_s64(phantom_timing->v_addressable * phantom_timing->h_total,
440-
(int64_t)(phantom_timing->pix_clk_100hz * 100) * 1000000);
439+
mall_region_us = div64_u64(((uint64_t)phantom_timing->v_addressable * phantom_timing->h_total * 1000000),
440+
(((uint64_t)phantom_timing->pix_clk_100hz * 100)));
441441
min_drr_supported_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US;
442-
min_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 *
443-
(div64_s64((int64_t)min_drr_supported_us, 1000000)),
444-
(int64_t)drr_timing->h_total);
445-
446-
prefetch_us = div64_s64((phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total,
447-
(int64_t)(phantom_timing->pix_clk_100hz * 100) * 1000000 +
448-
dc->caps.subvp_prefetch_end_to_mall_start_us);
449-
subvp_active_us = div64_s64(main_timing->v_addressable * main_timing->h_total,
450-
(int64_t)(main_timing->pix_clk_100hz * 100) * 1000000);
451-
drr_active_us = div64_s64(drr_timing->v_addressable * drr_timing->h_total,
452-
(int64_t)(drr_timing->pix_clk_100hz * 100) * 1000000);
453-
max_drr_vblank_us = div64_s64((int64_t)(subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us;
442+
min_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * min_drr_supported_us),
443+
(((uint64_t)drr_timing->h_total * 1000000)));
444+
445+
prefetch_us = div64_u64(((uint64_t)(phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total * 1000000),
446+
(((uint64_t)phantom_timing->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
447+
subvp_active_us = div64_u64(((uint64_t)main_timing->v_addressable * main_timing->h_total * 1000000),
448+
(((uint64_t)main_timing->pix_clk_100hz * 100)));
449+
drr_active_us = div64_u64(((uint64_t)drr_timing->v_addressable * drr_timing->h_total * 1000000),
450+
(((uint64_t)drr_timing->pix_clk_100hz * 100)));
451+
max_drr_vblank_us = div64_u64((subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us;
454452
max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us;
455453
max_drr_supported_us = max_drr_vblank_us > max_drr_mallregion_us ? max_drr_vblank_us : max_drr_mallregion_us;
456-
max_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 * (div64_s64((int64_t)max_drr_supported_us, 1000000)),
457-
(int64_t)drr_timing->h_total);
454+
max_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * max_drr_supported_us),
455+
(((uint64_t)drr_timing->h_total * 1000000)));
458456

459457
pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = min_vtotal_supported;
460458
pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = max_vtotal_supported;
@@ -548,27 +546,30 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
548546
struct dc_crtc_timing *phantom_timing1 = &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing;
549547
struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL;
550548

551-
subvp0_prefetch_us = div64_s64((phantom_timing0->v_total - phantom_timing0->v_front_porch) * phantom_timing0->h_total,
552-
(int64_t)(phantom_timing0->pix_clk_100hz * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us);
553-
subvp1_prefetch_us = div64_s64((phantom_timing1->v_total - phantom_timing1->v_front_porch) * phantom_timing1->h_total,
554-
(int64_t)(phantom_timing1->pix_clk_100hz * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us);
549+
subvp0_prefetch_us = div64_u64(((uint64_t)(phantom_timing0->v_total - phantom_timing0->v_front_porch) *
550+
(uint64_t)phantom_timing0->h_total * 1000000),
551+
(((uint64_t)phantom_timing0->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
552+
subvp1_prefetch_us = div64_u64(((uint64_t)(phantom_timing1->v_total - phantom_timing1->v_front_porch) *
553+
(uint64_t)phantom_timing1->h_total * 1000000),
554+
(((uint64_t)phantom_timing1->pix_clk_100hz * 100) + dc->caps.subvp_prefetch_end_to_mall_start_us));
555555

556556
// Whichever SubVP PIPE has the smaller prefetch (including the prefetch end to mall start time)
557557
// should increase it's prefetch time to match the other
558558
if (subvp0_prefetch_us > subvp1_prefetch_us) {
559559
pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[1];
560560
prefetch_delta_us = subvp0_prefetch_us - subvp1_prefetch_us;
561561
pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
562-
div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us), 1000000)) *
563-
(phantom_timing1->pix_clk_100hz * 100) + phantom_timing1->h_total - 1),
564-
(int64_t)phantom_timing1->h_total);
562+
div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) *
563+
((uint64_t)phantom_timing1->pix_clk_100hz * 100) + ((uint64_t)phantom_timing1->h_total * 1000000 - 1)),
564+
((uint64_t)phantom_timing1->h_total * 1000000));
565+
565566
} else if (subvp1_prefetch_us > subvp0_prefetch_us) {
566567
pipe_data = &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[0];
567568
prefetch_delta_us = subvp1_prefetch_us - subvp0_prefetch_us;
568569
pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
569-
div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us), 1000000)) *
570-
(phantom_timing0->pix_clk_100hz * 100) + phantom_timing0->h_total - 1),
571-
(int64_t)phantom_timing0->h_total);
570+
div64_u64(((uint64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) *
571+
((uint64_t)phantom_timing0->pix_clk_100hz * 100) + ((uint64_t)phantom_timing0->h_total * 1000000 - 1)),
572+
((uint64_t)phantom_timing0->h_total * 1000000));
572573
}
573574
}
574575

@@ -630,13 +631,11 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
630631

631632
// Round up
632633
pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
633-
div64_s64(((div64_s64((int64_t)dc->caps.subvp_prefetch_end_to_mall_start_us, 1000000)) *
634-
(phantom_timing->pix_clk_100hz * 100) + phantom_timing->h_total - 1),
635-
(int64_t)phantom_timing->h_total);
634+
div64_u64(((uint64_t)dc->caps.subvp_prefetch_end_to_mall_start_us * ((uint64_t)phantom_timing->pix_clk_100hz * 100) +
635+
((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000));
636636
pipe_data->pipe_config.subvp_data.processing_delay_lines =
637-
div64_s64(((div64_s64((int64_t)dc->caps.subvp_fw_processing_delay_us, 1000000)) *
638-
(phantom_timing->pix_clk_100hz * 100) + phantom_timing->h_total - 1),
639-
(int64_t)phantom_timing->h_total);
637+
div64_u64(((uint64_t)(dc->caps.subvp_fw_processing_delay_us) * ((uint64_t)phantom_timing->pix_clk_100hz * 100) +
638+
((uint64_t)phantom_timing->h_total * 1000000 - 1)), ((uint64_t)phantom_timing->h_total * 1000000));
640639
// Find phantom pipe index based on phantom stream
641640
for (j = 0; j < dc->res_pool->pipe_count; j++) {
642641
struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j];

0 commit comments

Comments
 (0)