package rosa import "hakurei.app/internal/pkg" const kernelVersion = "6.12.76" var kernelSource = pkg.NewHTTPGetTar( nil, "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/"+ "snapshot/linux-"+kernelVersion+".tar.gz", mustDecode("h0UATNznQbzplvthAqNLjVF-DJQHzGyhiy4za-9Ig9tOIpnoH9mWHbEjASV6lOl2"), pkg.TarGzip, ) func (t Toolchain) newKernelSource() (pkg.Artifact, string) { return t.New("kernel-"+kernelVersion+"-src", 0, nil, nil, nil, ` mkdir -p /work/usr/src/ cp -r /usr/src/linux /work/usr/src/ chmod -R +w /work/usr/src/linux/ `, pkg.Path(AbsUsrSrc.Append("linux"), false, kernelSource)), kernelVersion } func init() { artifactsM[KernelSource] = Metadata{ f: Toolchain.newKernelSource, Name: "kernel-source", Description: "a writable kernel source tree installed to /usr/src/linux", Website: "https://kernel.org/", } } func (t Toolchain) newKernelHeaders() (pkg.Artifact, string) { return t.NewPackage("kernel-headers", kernelVersion, kernelSource, &PackageAttr{ Flag: TEarly, }, &MakeHelper{ SkipConfigure: true, SkipCheck: true, Make: []string{ "-f /usr/src/kernel-headers/Makefile", "O=/tmp/kbuild", "LLVM=1", `HOSTLDFLAGS="${LDFLAGS:-''}"`, "INSTALL_HDR_PATH=/work/system", "headers_install", }, Install: "# headers installed during make", }, Rsync, ), kernelVersion } func init() { artifactsM[KernelHeaders] = Metadata{ f: Toolchain.newKernelHeaders, Name: "kernel-headers", Description: "an installation of kernel headers", Website: "https://kernel.org/", } } func (t Toolchain) newKernel() (pkg.Artifact, string) { return t.NewPackage("kernel", kernelVersion, kernelSource, &PackageAttr{ Env: []string{ "PATH=/system/sbin", }, ScriptEarly: ` install -Dm0400 \ /usr/src/.config \ /tmp/kbuild/.config install -Dm0500 \ /usr/src/.installkernel \ /sbin/installkernel `, Paths: []pkg.ExecPath{ pkg.Path(AbsUsrSrc.Append( ".config", ), false, pkg.NewFile(".config", kernelConfig)), pkg.Path(AbsUsrSrc.Append( ".installkernel", ), false, pkg.NewFile("installkernel", []byte(`#!/bin/sh -e echo "Installing linux $1..." cp -av "$2" "$4" cp -av "$3" "$4" `))), }, Patches: [][2]string{ {"f54a91f5337cd918eb86cf600320d25b6cfd8209", `From f54a91f5337cd918eb86cf600320d25b6cfd8209 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 13 Dec 2025 19:58:10 +0900 Subject: drm/amd/display: Reduce number of arguments of dcn30's CalculatePrefetchSchedule() After an innocuous optimization change in clang-22, dml30_ModeSupportAndSystemConfigurationFull() is over the 2048 byte stack limit for display_mode_vba_30.c. drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3529:6: warning: stack frame size (2096) exceeds limit (2048) in 'dml30_ModeSupportAndSystemConfigurationFull' [-Wframe-larger-than] 3529 | void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ With clang-21, this function was already close to the limit: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3529:6: warning: stack frame size (1912) exceeds limit (1586) in 'dml30_ModeSupportAndSystemConfigurationFull' [-Wframe-larger-than] 3529 | void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ CalculatePrefetchSchedule() has a large number of parameters, which must be passed on the stack. Most of the parameters between the two callsites are the same, so they can be accessed through the existing mode_lib pointer, instead of being passed as explicit arguments. Doing this reduces the stack size of dml30_ModeSupportAndSystemConfigurationFull() from 2096 bytes to 1912 bytes with clang-22. Closes: https://github.com/ClangBuiltLinux/linux/issues/2117 Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher (cherry picked from commit b20b3fc4210f83089f835cdb91deec4b0778761a) --- .../amd/display/dc/dml/dcn30/display_mode_vba_30.c | 258 ++++++--------------- 1 file changed, 73 insertions(+), 185 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index abe51cf3aab2..a244504cc1f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -77,32 +77,14 @@ static unsigned int dscceComputeDelay( static unsigned int dscComputeDelay( enum output_format_class pixelFormat, enum output_encoder_class Output); -// Super monster function with some 45 argument static bool CalculatePrefetchSchedule( struct display_mode_lib *mode_lib, - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + unsigned int k, Pipe *myPipe, unsigned int DSCDelay, - double DPPCLKDelaySubtotalPlusCNVCFormater, - double DPPCLKDelaySCL, - double DPPCLKDelaySCLLBOnly, - double DPPCLKDelayCNVCCursor, - double DISPCLKDelaySubtotal, unsigned int DPP_RECOUT_WIDTH, - enum output_format_class OutputFormat, - unsigned int MaxInterDCNTileRepeaters, unsigned int VStartup, unsigned int MaxVStartup, - unsigned int GPUVMPageTableLevels, - bool GPUVMEnable, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - double HostVMMinPageSize, - bool DynamicMetadataEnable, - bool DynamicMetadataVMEnabled, - int DynamicMetadataLinesBeforeActiveRequired, - unsigned int DynamicMetadataTransmittedBytes, double UrgentLatency, double UrgentExtraLatency, double TCalc, @@ -116,7 +98,6 @@ static bool CalculatePrefetchSchedule( unsigned int MaxNumSwathY, double PrefetchSourceLinesC, unsigned int SwathWidthC, - int BytePerPixelC, double VInitPreFillC, unsigned int MaxNumSwathC, long swath_width_luma_ub, @@ -124,9 +105,6 @@ static bool CalculatePrefetchSchedule( unsigned int SwathHeightY, unsigned int SwathHeightC, double TWait, - bool ProgressiveToInterlaceUnitInOPP, - double *DSTXAfterScaler, - double *DSTYAfterScaler, double *DestinationLinesForPrefetch, double *PrefetchBandwidth, double *DestinationLinesToRequestVMInVBlank, @@ -135,14 +113,7 @@ static bool CalculatePrefetchSchedule( double *VRatioPrefetchC, double *RequiredPrefetchPixDataBWLuma, double *RequiredPrefetchPixDataBWChroma, - bool *NotEnoughTimeForDynamicMetadata, - double *Tno_bw, - double *prefetch_vmrow_bw, - double *Tdmdl_vm, - double *Tdmdl, - unsigned int *VUpdateOffsetPix, - double *VUpdateWidthPix, - double *VReadyOffsetPix); + bool *NotEnoughTimeForDynamicMetadata); static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); static void CalculateDCCConfiguration( @@ -810,29 +781,12 @@ static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum o static bool CalculatePrefetchSchedule( struct display_mode_lib *mode_lib, - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, - double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + unsigned int k, Pipe *myPipe, unsigned int DSCDelay, - double DPPCLKDelaySubtotalPlusCNVCFormater, - double DPPCLKDelaySCL, - double DPPCLKDelaySCLLBOnly, - double DPPCLKDelayCNVCCursor, - double DISPCLKDelaySubtotal, unsigned int DPP_RECOUT_WIDTH, - enum output_format_class OutputFormat, - unsigned int MaxInterDCNTileRepeaters, unsigned int VStartup, unsigned int MaxVStartup, - unsigned int GPUVMPageTableLevels, - bool GPUVMEnable, - bool HostVMEnable, - unsigned int HostVMMaxNonCachedPageTableLevels, - double HostVMMinPageSize, - bool DynamicMetadataEnable, - bool DynamicMetadataVMEnabled, - int DynamicMetadataLinesBeforeActiveRequired, - unsigned int DynamicMetadataTransmittedBytes, double UrgentLatency, double UrgentExtraLatency, double TCalc, @@ -846,7 +800,6 @@ static bool CalculatePrefetchSchedule( unsigned int MaxNumSwathY, double PrefetchSourceLinesC, unsigned int SwathWidthC, - int BytePerPixelC, double VInitPreFillC, unsigned int MaxNumSwathC, long swath_width_luma_ub, @@ -854,9 +807,6 @@ static bool CalculatePrefetchSchedule( unsigned int SwathHeightY, unsigned int SwathHeightC, double TWait, - bool ProgressiveToInterlaceUnitInOPP, - double *DSTXAfterScaler, - double *DSTYAfterScaler, double *DestinationLinesForPrefetch, double *PrefetchBandwidth, double *DestinationLinesToRequestVMInVBlank, @@ -865,15 +815,10 @@ static bool CalculatePrefetchSchedule( double *VRatioPrefetchC, double *RequiredPrefetchPixDataBWLuma, double *RequiredPrefetchPixDataBWChroma, - bool *NotEnoughTimeForDynamicMetadata, - double *Tno_bw, - double *prefetch_vmrow_bw, - double *Tdmdl_vm, - double *Tdmdl, - unsigned int *VUpdateOffsetPix, - double *VUpdateWidthPix, - double *VReadyOffsetPix) + bool *NotEnoughTimeForDynamicMetadata) { + struct vba_vars_st *v = &mode_lib->vba; + double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; bool MyError = false; unsigned int DPPCycles = 0, DISPCLKCycles = 0; double DSTTotalPixelsAfterScaler = 0; @@ -905,26 +850,26 @@ static bool CalculatePrefetchSchedule( double Tdmec = 0; double Tdmsks = 0; - if (GPUVMEnable == true && HostVMEnable == true) { - HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + if (v->GPUVMEnable == true && v->HostVMEnable == true) { + HostVMInefficiencyFactor = v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; + HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; } else { HostVMInefficiencyFactor = 1; HostVMDynamicLevelsTrips = 0; } CalculateDynamicMetadataParameters( - MaxInterDCNTileRepeaters, + v->MaxInterDCNTileRepeaters, myPipe->DPPCLK, myPipe->DISPCLK, myPipe->DCFCLKDeepSleep, myPipe->PixelClock, myPipe->HTotal, myPipe->VBlank, - DynamicMetadataTransmittedBytes, - DynamicMetadataLinesBeforeActiveRequired, + v->DynamicMetadataTransmittedBytes[k], + v->DynamicMetadataLinesBeforeActiveRequired[k], myPipe->InterlaceEnable, - ProgressiveToInterlaceUnitInOPP, + v->ProgressiveToInterlaceUnitInOPP, &Tsetup, &Tdmbf, &Tdmec, @@ -932,16 +877,16 @@ static bool CalculatePrefetchSchedule( LineTime = myPipe->HTotal / myPipe->PixelClock; trip_to_mem = UrgentLatency; - Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); + Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); - if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { - *Tdmdl = TWait + Tvm_trips + trip_to_mem; + if (v->DynamicMetadataVMEnabled == true && v->GPUVMEnable == true) { + v->Tdmdl[k] = TWait + Tvm_trips + trip_to_mem; } else { - *Tdmdl = TWait + UrgentExtraLatency; + v->Tdmdl[k] = TWait + UrgentExtraLatency; } - if (DynamicMetadataEnable == true) { - if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { + if (v->DynamicMetadataEnable[k] == true) { + if (VStartup * LineTime < Tsetup + v->Tdmdl[k] + Tdmbf + Tdmec + Tdmsks) { *NotEnoughTimeForDynamicMetadata = true; } else { *NotEnoughTimeForDynamicMetadata = false; @@ -949,39 +894,39 @@ static bool CalculatePrefetchSchedule( dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); - dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); + dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", v->Tdmdl[k]); } } else { *NotEnoughTimeForDynamicMetadata = false; } - *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); + v->Tdmdl_vm[k] = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && v->GPUVMEnable == true ? TWait + Tvm_trips : 0); if (myPipe->ScalerEnabled) - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; else - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; - DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; + DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; - DISPCLKCycles = DISPCLKDelaySubtotal; + DISPCLKCycles = v->DISPCLKDelaySubtotal; if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) return true; - *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + v->DSTXAfterScaler[k] = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; - *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; + v->DSTXAfterScaler[k] = v->DSTXAfterScaler[k] + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; - if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP)) - *DSTYAfterScaler = 1; + if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && v->ProgressiveToInterlaceUnitInOPP)) + v->DSTYAfterScaler[k] = 1; else - *DSTYAfterScaler = 0; + v->DSTYAfterScaler[k] = 0; - DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; - *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); - *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); + DSTTotalPixelsAfterScaler = v->DSTYAfterScaler[k] * myPipe->HTotal + v->DSTXAfterScaler[k]; + v->DSTYAfterScaler[k] = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); + v->DSTXAfterScaler[k] = DSTTotalPixelsAfterScaler - ((double) (v->DSTYAfterScaler[k] * myPipe->HTotal)); MyError = false; @@ -990,32 +935,32 @@ static bool CalculatePrefetchSchedule( Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; - if (GPUVMEnable) { - if (GPUVMPageTableLevels >= 3) { - *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); + if (v->GPUVMEnable) { + if (v->GPUVMMaxPageTableLevels >= 3) { + v->Tno_bw[k] = UrgentExtraLatency + trip_to_mem * ((v->GPUVMMaxPageTableLevels - 2) - 1); } else - *Tno_bw = 0; + v->Tno_bw[k] = 0; } else if (!myPipe->DCCEnable) - *Tno_bw = LineTime; + v->Tno_bw[k] = LineTime; else - *Tno_bw = LineTime / 4; + v->Tno_bw[k] = LineTime / 4; - dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); + dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, v->Tdmdl[k])) / LineTime + - (v->DSTYAfterScaler[k] + v->DSTXAfterScaler[k] / myPipe->HTotal); Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC); Tsw_oto = Lsw_oto * LineTime; - prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto; + prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k]) / Tsw_oto; - if (GPUVMEnable == true) { - Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, + if (v->GPUVMEnable == true) { + Tvm_oto = dml_max3(v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); } else Tvm_oto = LineTime / 4.0; - if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { + if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { Tr0_oto = dml_max3( (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, LineTime - Tvm_oto, LineTime / 4); @@ -1041,10 +986,10 @@ static bool CalculatePrefetchSchedule( dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); - dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm); - dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); - dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler); - dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler); + dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", v->Tdmdl_vm[k]); + dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", v->Tdmdl[k]); + dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", v->DSTXAfterScaler[k]); + dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)v->DSTYAfterScaler[k]); *PrefetchBandwidth = 0; *DestinationLinesToRequestVMInVBlank = 0; @@ -1058,26 +1003,26 @@ static bool CalculatePrefetchSchedule( double PrefetchBandwidth3 = 0; double PrefetchBandwidth4 = 0; - if (Tpre_rounded - *Tno_bw > 0) + if (Tpre_rounded - v->Tno_bw[k] > 0) PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY - + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) - / (Tpre_rounded - *Tno_bw); + + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k]) + / (Tpre_rounded - v->Tno_bw[k]); else PrefetchBandwidth1 = 0; - if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) { - PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw); + if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - v->Tno_bw[k]) > 0) { + PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - v->Tno_bw[k]); } - if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) + if (Tpre_rounded - v->Tno_bw[k] - 2 * Tr0_trips_rounded > 0) PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * - BytePerPixelC) / - (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); + v->BytePerPixelC[k]) / + (Tpre_rounded - v->Tno_bw[k] - 2 * Tr0_trips_rounded); else PrefetchBandwidth2 = 0; @@ -1085,7 +1030,7 @@ static bool CalculatePrefetchSchedule( PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * - swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded - + swath_width_chroma_ub * v->BytePerPixelC[k]) / (Tpre_rounded - Tvm_trips_rounded); else PrefetchBandwidth3 = 0; @@ -1095,7 +1040,7 @@ static bool CalculatePrefetchSchedule( } if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) - PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) + PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * v->BytePerPixelC[k]) / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); else PrefetchBandwidth4 = 0; @@ -1106,7 +1051,7 @@ static bool CalculatePrefetchSchedule( bool Case3OK; if (PrefetchBandwidth1 > 0) { - if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 + if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { Case1OK = true; } else { @@ -1117,7 +1062,7 @@ static bool CalculatePrefetchSchedule( } if (PrefetchBandwidth2 > 0) { - if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 + if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { Case2OK = true; } else { @@ -1128,7 +1073,7 @@ static bool CalculatePrefetchSchedule( } if (PrefetchBandwidth3 > 0) { - if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 + if (v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { Case3OK = true; } else { @@ -1151,13 +1096,13 @@ static bool CalculatePrefetchSchedule( dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ); if (prefetch_bw_equ > 0) { - if (GPUVMEnable) { - Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); + if (v->GPUVMEnable) { + Tvm_equ = dml_max3(v->Tno_bw[k] + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); } else { Tvm_equ = LineTime / 4; } - if ((GPUVMEnable || myPipe->DCCEnable)) { + if ((v->GPUVMEnable || myPipe->DCCEnable)) { Tr0_equ = dml_max4( (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, @@ -1226,7 +1171,7 @@ static bool CalculatePrefetchSchedule( } *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime; - *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime; + *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * v->BytePerPixelC[k] * swath_width_chroma_ub / LineTime; } else { MyError = true; dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); @@ -1242,9 +1187,9 @@ static bool CalculatePrefetchSchedule( dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime); - dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); + dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (v->DSTYAfterScaler[k] + ((v->DSTXAfterScaler[k]) / (double) myPipe->HTotal)) * LineTime); dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n"); - dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup); + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (v->DSTYAfterScaler[k] + ((v->DSTXAfterScaler[k]) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup); dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); } else { @@ -1275,7 +1220,7 @@ static bool CalculatePrefetchSchedule( dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); } - *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); + v->prefetch_vmrow_bw[k] = dml_max(prefetch_vm_bw, prefetch_row_bw); } if (MyError) { @@ -2448,30 +2393,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->ErrorResult[k] = CalculatePrefetchSchedule( mode_lib, - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + k, &myPipe, v->DSCDelay[k], - v->DPPCLKDelaySubtotal - + v->DPPCLKDelayCNVCFormater, - v->DPPCLKDelaySCL, - v->DPPCLKDelaySCLLBOnly, - v->DPPCLKDelayCNVCCursor, - v->DISPCLKDelaySubtotal, (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), - v->OutputFormat[k], - v->MaxInterDCNTileRepeaters, dml_min(v->VStartupLines, v->MaxVStartupLines[k]), v->MaxVStartupLines[k], - v->GPUVMMaxPageTableLevels, - v->GPUVMEnable, - v->HostVMEnable, - v->HostVMMaxNonCachedPageTableLevels, - v->HostVMMinPageSize, - v->DynamicMetadataEnable[k], - v->DynamicMetadataVMEnabled, - v->DynamicMetadataLinesBeforeActiveRequired[k], - v->DynamicMetadataTransmittedBytes[k], v->UrgentLatency, v->UrgentExtraLatency, v->TCalc, @@ -2485,7 +2412,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->MaxNumSwathY[k], v->PrefetchSourceLinesC[k], v->SwathWidthC[k], - v->BytePerPixelC[k], v->VInitPreFillC[k], v->MaxNumSwathC[k], v->swath_width_luma_ub[k], @@ -2493,9 +2419,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->SwathHeightY[k], v->SwathHeightC[k], TWait, - v->ProgressiveToInterlaceUnitInOPP, - &v->DSTXAfterScaler[k], - &v->DSTYAfterScaler[k], &v->DestinationLinesForPrefetch[k], &v->PrefetchBandwidth[k], &v->DestinationLinesToRequestVMInVBlank[k], @@ -2504,14 +2427,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman &v->VRatioPrefetchC[k], &v->RequiredPrefetchPixDataBWLuma[k], &v->RequiredPrefetchPixDataBWChroma[k], - &v->NotEnoughTimeForDynamicMetadata[k], - &v->Tno_bw[k], - &v->prefetch_vmrow_bw[k], - &v->Tdmdl_vm[k], - &v->Tdmdl[k], - &v->VUpdateOffsetPix[k], - &v->VUpdateWidthPix[k], - &v->VReadyOffsetPix[k]); + &v->NotEnoughTimeForDynamicMetadata[k]); if (v->BlendingAndTiming[k] == k) { double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK); v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k]; @@ -4781,29 +4697,12 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( mode_lib, - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, - v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, + k, &myPipe, v->DSCDelayPerState[i][k], - v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, - v->DPPCLKDelaySCL, - v->DPPCLKDelaySCLLBOnly, - v->DPPCLKDelayCNVCCursor, - v->DISPCLKDelaySubtotal, v->SwathWidthYThisState[k] / v->HRatio[k], - v->OutputFormat[k], - v->MaxInterDCNTileRepeaters, dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), v->MaximumVStartup[i][j][k], - v->GPUVMMaxPageTableLevels, - v->GPUVMEnable, - v->HostVMEnable, - v->HostVMMaxNonCachedPageTableLevels, - v->HostVMMinPageSize, - v->DynamicMetadataEnable[k], - v->DynamicMetadataVMEnabled, - v->DynamicMetadataLinesBeforeActiveRequired[k], - v->DynamicMetadataTransmittedBytes[k], v->UrgLatency[i], v->ExtraLatency, v->TimeCalc, @@ -4817,7 +4716,6 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->MaxNumSwY[k], v->PrefetchLinesC[i][j][k], v->SwathWidthCThisState[k], - v->BytePerPixelC[k], v->PrefillC[k], v->MaxNumSwC[k], v->swath_width_luma_ub_this_state[k], @@ -4825,9 +4723,6 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->SwathHeightYThisState[k], v->SwathHeightCThisState[k], v->TWait, - v->ProgressiveToInterlaceUnitInOPP, - &v->DSTXAfterScaler[k], - &v->DSTYAfterScaler[k], &v->LineTimesForPrefetch[k], &v->PrefetchBW[k], &v->LinesForMetaPTE[k], @@ -4836,14 +4731,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &v->VRatioPreC[i][j][k], &v->RequiredPrefetchPixelDataBWLuma[i][j][k], &v->RequiredPrefetchPixelDataBWChroma[i][j][k], - &v->NoTimeForDynamicMetadata[i][j][k], - &v->Tno_bw[k], - &v->prefetch_vmrow_bw[k], - &v->Tdmdl_vm[k], - &v->Tdmdl[k], - &v->VUpdateOffsetPix[k], - &v->VUpdateWidthPix[k], - &v->VReadyOffsetPix[k]); + &v->NoTimeForDynamicMetadata[i][j][k]); } for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { -- cgit 1.2.3-korg `}, {"6ce6fbfddc5b127e4f57c3b5bfdcf40239a4fc2f", `From 6ce6fbfddc5b127e4f57c3b5bfdcf40239a4fc2f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 13 Dec 2025 19:58:11 +0900 Subject: drm/amd/display: Reduce number of arguments of dcn30's CalculateWatermarksAndDRAMSpeedChangeSupport() CalculateWatermarksAndDRAMSpeedChangeSupport() has a large number of parameters, which must be passed on the stack. Most of the parameters between the two callsites are the same, so they can be accessed through the existing mode_lib pointer, instead of being passed as explicit arguments. Doing this reduces the stack size of dml30_ModeSupportAndSystemConfigurationFull() from 1912 bytes to 1840 bytes building for x86_64 with clang-22, helping stay under the 2048 byte limit for display_mode_vba_30.c. Additionally, now that there is a pointer to mode_lib->vba available, use 'v' consistently throughout the entire function. Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher (cherry picked from commit 563dfbefdf633c8d958398ddfa3955f9f40e47d9) --- .../amd/display/dc/dml/dcn30/display_mode_vba_30.c | 287 +++++---------------- 1 file changed, 66 insertions(+), 221 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 2d19bb8de59c84..1df3412be3465d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -265,62 +265,23 @@ static void CalculateDynamicMetadataParameters( static void CalculateWatermarksAndDRAMSpeedChangeSupport( struct display_mode_lib *mode_lib, unsigned int PrefetchMode, - unsigned int NumberOfActivePlanes, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int DPPOutputBufferPixels, - unsigned int DETBufferSizeInKByte, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool GPUVMEnable, - unsigned int dpte_group_bytes[], - unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, - double WritebackLatency, - double WritebackChunkSize, double SOCCLK, - double DRAMClockChangeLatency, - double SRExitTime, - double SREnterPlusExitTime, double DCFCLKDeepSleep, unsigned int DPPPerPlane[], - bool DCCEnable[], double DPPCLK[], unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int vtaps[], - unsigned int VTAPsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - double PixelClock[], - unsigned int BlendingAndTiming[], double BytePerPixelDETY[], double BytePerPixelDETC[], - double DSTXAfterScaler[], - double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], - enum clock_change_support *DRAMClockChangeSupport, - double *UrgentWatermark, - double *WritebackUrgentWatermark, - double *DRAMClockChangeWatermark, - double *WritebackDRAMClockChangeWatermark, - double *StutterExitWatermark, - double *StutterEnterPlusExitWatermark, - double *MinActiveDRAMClockChangeLatencySupported); + enum clock_change_support *DRAMClockChangeSupport); static void CalculateDCFCLKDeepSleep( struct display_mode_lib *mode_lib, unsigned int NumberOfActivePlanes, @@ -2646,62 +2607,23 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman CalculateWatermarksAndDRAMSpeedChangeSupport( mode_lib, PrefetchMode, - v->NumberOfActivePlanes, - v->MaxLineBufferLines, - v->LineBufferSize, - v->DPPOutputBufferPixels, - v->DETBufferSizeInKByte[0], - v->WritebackInterfaceBufferSize, v->DCFCLK, v->ReturnBW, - v->GPUVMEnable, - v->dpte_group_bytes, - v->MetaChunkSize, v->UrgentLatency, v->UrgentExtraLatency, - v->WritebackLatency, - v->WritebackChunkSize, v->SOCCLK, - v->FinalDRAMClockChangeLatency, - v->SRExitTime, - v->SREnterPlusExitTime, v->DCFCLKDeepSleep, v->DPPPerPlane, - v->DCCEnable, v->DPPCLK, v->DETBufferSizeY, v->DETBufferSizeC, v->SwathHeightY, v->SwathHeightC, - v->LBBitPerPixel, v->SwathWidthY, v->SwathWidthC, - v->HRatio, - v->HRatioChroma, - v->vtaps, - v->VTAPsChroma, - v->VRatio, - v->VRatioChroma, - v->HTotal, - v->PixelClock, - v->BlendingAndTiming, v->BytePerPixelDETY, v->BytePerPixelDETC, - v->DSTXAfterScaler, - v->DSTYAfterScaler, - v->WritebackEnable, - v->WritebackPixelFormat, - v->WritebackDestinationWidth, - v->WritebackDestinationHeight, - v->WritebackSourceHeight, - &DRAMClockChangeSupport, - &v->UrgentWatermark, - &v->WritebackUrgentWatermark, - &v->DRAMClockChangeWatermark, - &v->WritebackDRAMClockChangeWatermark, - &v->StutterExitWatermark, - &v->StutterEnterPlusExitWatermark, - &v->MinActiveDRAMClockChangeLatencySupported); + &DRAMClockChangeSupport); for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->WritebackEnable[k] == true) { @@ -4895,62 +4817,23 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l CalculateWatermarksAndDRAMSpeedChangeSupport( mode_lib, v->PrefetchModePerState[i][j], - v->NumberOfActivePlanes, - v->MaxLineBufferLines, - v->LineBufferSize, - v->DPPOutputBufferPixels, - v->DETBufferSizeInKByte[0], - v->WritebackInterfaceBufferSize, v->DCFCLKState[i][j], v->ReturnBWPerState[i][j], - v->GPUVMEnable, - v->dpte_group_bytes, - v->MetaChunkSize, v->UrgLatency[i], v->ExtraLatency, - v->WritebackLatency, - v->WritebackChunkSize, v->SOCCLKPerState[i], - v->FinalDRAMClockChangeLatency, - v->SRExitTime, - v->SREnterPlusExitTime, v->ProjectedDCFCLKDeepSleep[i][j], v->NoOfDPPThisState, - v->DCCEnable, v->RequiredDPPCLKThisState, v->DETBufferSizeYThisState, v->DETBufferSizeCThisState, v->SwathHeightYThisState, v->SwathHeightCThisState, - v->LBBitPerPixel, v->SwathWidthYThisState, v->SwathWidthCThisState, - v->HRatio, - v->HRatioChroma, - v->vtaps, - v->VTAPsChroma, - v->VRatio, - v->VRatioChroma, - v->HTotal, - v->PixelClock, - v->BlendingAndTiming, v->BytePerPixelInDETY, v->BytePerPixelInDETC, - v->DSTXAfterScaler, - v->DSTYAfterScaler, - v->WritebackEnable, - v->WritebackPixelFormat, - v->WritebackDestinationWidth, - v->WritebackDestinationHeight, - v->WritebackSourceHeight, - &v->DRAMClockChangeSupport[i][j], - &v->UrgentWatermark, - &v->WritebackUrgentWatermark, - &v->DRAMClockChangeWatermark, - &v->WritebackDRAMClockChangeWatermark, - &v->StutterExitWatermark, - &v->StutterEnterPlusExitWatermark, - &v->MinActiveDRAMClockChangeLatencySupported); + &v->DRAMClockChangeSupport[i][j]); } } @@ -5067,63 +4950,25 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l static void CalculateWatermarksAndDRAMSpeedChangeSupport( struct display_mode_lib *mode_lib, unsigned int PrefetchMode, - unsigned int NumberOfActivePlanes, - unsigned int MaxLineBufferLines, - unsigned int LineBufferSize, - unsigned int DPPOutputBufferPixels, - unsigned int DETBufferSizeInKByte, - unsigned int WritebackInterfaceBufferSize, double DCFCLK, double ReturnBW, - bool GPUVMEnable, - unsigned int dpte_group_bytes[], - unsigned int MetaChunkSize, double UrgentLatency, double ExtraLatency, - double WritebackLatency, - double WritebackChunkSize, double SOCCLK, - double DRAMClockChangeLatency, - double SRExitTime, - double SREnterPlusExitTime, double DCFCLKDeepSleep, unsigned int DPPPerPlane[], - bool DCCEnable[], double DPPCLK[], unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], - unsigned int LBBitPerPixel[], double SwathWidthY[], double SwathWidthC[], - double HRatio[], - double HRatioChroma[], - unsigned int vtaps[], - unsigned int VTAPsChroma[], - double VRatio[], - double VRatioChroma[], - unsigned int HTotal[], - double PixelClock[], - unsigned int BlendingAndTiming[], double BytePerPixelDETY[], double BytePerPixelDETC[], - double DSTXAfterScaler[], - double DSTYAfterScaler[], - bool WritebackEnable[], - enum source_format_class WritebackPixelFormat[], - double WritebackDestinationWidth[], - double WritebackDestinationHeight[], - double WritebackSourceHeight[], - enum clock_change_support *DRAMClockChangeSupport, - double *UrgentWatermark, - double *WritebackUrgentWatermark, - double *DRAMClockChangeWatermark, - double *WritebackDRAMClockChangeWatermark, - double *StutterExitWatermark, - double *StutterEnterPlusExitWatermark, - double *MinActiveDRAMClockChangeLatencySupported) + enum clock_change_support *DRAMClockChangeSupport) { + struct vba_vars_st *v = &mode_lib->vba; double EffectiveLBLatencyHidingY = 0; double EffectiveLBLatencyHidingC = 0; double LinesInDETY[DC__NUM_DPP__MAX] = { 0 }; @@ -5142,101 +4987,101 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double WritebackDRAMClockChangeLatencyHiding = 0; unsigned int k, j; - mode_lib->vba.TotalActiveDPP = 0; - mode_lib->vba.TotalDCCActiveDPP = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k]; - if (DCCEnable[k] == true) { - mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k]; + v->TotalActiveDPP = 0; + v->TotalDCCActiveDPP = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + v->TotalActiveDPP = v->TotalActiveDPP + DPPPerPlane[k]; + if (v->DCCEnable[k] == true) { + v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + DPPPerPlane[k]; } } - *UrgentWatermark = UrgentLatency + ExtraLatency; + v->UrgentWatermark = UrgentLatency + ExtraLatency; - *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark; + v->DRAMClockChangeWatermark = v->FinalDRAMClockChangeLatency + v->UrgentWatermark; - mode_lib->vba.TotalActiveWriteback = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (WritebackEnable[k] == true) { - mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1; + v->TotalActiveWriteback = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->WritebackEnable[k] == true) { + v->TotalActiveWriteback = v->TotalActiveWriteback + 1; } } - if (mode_lib->vba.TotalActiveWriteback <= 1) { - *WritebackUrgentWatermark = WritebackLatency; + if (v->TotalActiveWriteback <= 1) { + v->WritebackUrgentWatermark = v->WritebackLatency; } else { - *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } - if (mode_lib->vba.TotalActiveWriteback <= 1) { - *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency; + if (v->TotalActiveWriteback <= 1) { + v->WritebackDRAMClockChangeWatermark = v->FinalDRAMClockChangeLatency + v->WritebackLatency; } else { - *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; + v->WritebackDRAMClockChangeWatermark = v->FinalDRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { - mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1); + v->LBLatencyHidingSourceLinesY = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); - mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1); + v->LBLatencyHidingSourceLinesC = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); - EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]); + EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); - EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); + EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k]; LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); - FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; + FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; if (BytePerPixelDETC[k] > 0) { - LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; + LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); - FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; } else { LinesInDETC = 0; FullDETBufferingTimeC = 999999; } - ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark; + ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - v->UrgentWatermark - (v->HTotal[k] / v->PixelClock[k]) * (v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) - v->DRAMClockChangeWatermark; - if (NumberOfActivePlanes > 1) { - ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; + if (v->NumberOfActivePlanes > 1) { + ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; } if (BytePerPixelDETC[k] > 0) { - ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark; + ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - v->UrgentWatermark - (v->HTotal[k] / v->PixelClock[k]) * (v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) - v->DRAMClockChangeWatermark; - if (NumberOfActivePlanes > 1) { - ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; + if (v->NumberOfActivePlanes > 1) { + ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; } - mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); + v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); } else { - mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; + v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; } - if (WritebackEnable[k] == true) { + if (v->WritebackEnable[k] == true) { - WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); - if (WritebackPixelFormat[k] == dm_444_64) { + WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); + if (v->WritebackPixelFormat[k] == dm_444_64) { WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; } - if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) { + if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) { WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2; } - WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark; - mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); + WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; + v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); } } - mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999; + v->MinActiveDRAMClockChangeMargin = 999999; PlaneWithMinActiveDRAMClockChangeMargin = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) { - mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; - if (BlendingAndTiming[k] == k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { + v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; + if (v->BlendingAndTiming[k] == k) { PlaneWithMinActiveDRAMClockChangeMargin = k; } else { - for (j = 0; j < NumberOfActivePlanes; ++j) { - if (BlendingAndTiming[k] == j) { + for (j = 0; j < v->NumberOfActivePlanes; ++j) { + if (v->BlendingAndTiming[k] == j) { PlaneWithMinActiveDRAMClockChangeMargin = j; } } @@ -5244,40 +5089,40 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( } } - *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency; + v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->FinalDRAMClockChangeLatency; SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { - SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { + SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; } } - mode_lib->vba.TotalNumberOfActiveOTG = 0; - for (k = 0; k < NumberOfActivePlanes; ++k) { - if (BlendingAndTiming[k] == k) { - mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1; + v->TotalNumberOfActiveOTG = 0; + for (k = 0; k < v->NumberOfActivePlanes; ++k) { + if (v->BlendingAndTiming[k] == k) { + v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; } } - if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { + if (v->MinActiveDRAMClockChangeMargin > 0) { *DRAMClockChangeSupport = dm_dram_clock_change_vactive; - } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) { + } else if (((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) { *DRAMClockChangeSupport = dm_dram_clock_change_vblank; } else { *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0]; - for (k = 0; k < NumberOfActivePlanes; ++k) { + for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) { FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k]; - TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k]; + TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; } } - *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; - *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane); + v->StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; + v->StutterEnterPlusExitWatermark = dml_max(v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane); } -- cgit 1.2.3-korg `}, }, Flag: TExclusive, }, &MakeHelper{ OmitDefaults: true, SkipConfigure: true, // Build, install, and boot kernel before running kselftest on it. SkipCheck: true, Make: []string{ "-f /usr/src/kernel/Makefile", "O=/tmp/kbuild", "LLVM=1", "KBUILD_BUILD_VERSION='1-Rosa'", "KBUILD_BUILD_TIMESTAMP='2106-02-07 06:28:15 UTC'", "KBUILD_BUILD_USER=kbuild", "KBUILD_BUILD_HOST=localhost", "all", }, Install: ` make \ "-j$(nproc)" \ -f /usr/src/kernel/Makefile \ O=/tmp/kbuild \ LLVM=1 \ INSTALL_PATH=/work \ install \ INSTALL_MOD_PATH=/work \ modules_install rm -v /work/lib/modules/` + kernelVersion + `/build `, }, Flex, Bison, M4, Tar, Perl, BC, Sed, Gawk, Coreutils, Diffutils, Python, XZ, Zlib, Gzip, Bzip2, Zstd, Kmod, Elfutils, OpenSSL, UtilLinux, KernelHeaders, ), kernelVersion } func init() { artifactsM[Kernel] = Metadata{ f: Toolchain.newKernel, Name: "kernel", Description: "the generic Rosa OS linux kernel", Website: "https://kernel.org/", ID: 375621, } } func (t Toolchain) newGenInitCPIO() (pkg.Artifact, string) { return t.New("gen_init_cpio-"+kernelVersion, 0, nil, nil, nil, ` mkdir -p /work/system/bin/ cc -o /work/system/bin/gen_init_cpio /usr/src/linux/usr/gen_init_cpio.c `, pkg.Path(AbsUsrSrc.Append("linux"), false, kernelSource)), kernelVersion } func init() { artifactsM[GenInitCPIO] = Metadata{ f: Toolchain.newGenInitCPIO, Name: "gen_init_cpio", Description: "a program in the kernel source tree for creating initramfs archive", } }