diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index dba7e680d0b2c..c4e1775e445c3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -74,6 +74,13 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } fclose(fp); } + if constexpr (std::string_view("CUDA") == "HIP") { // Check if we are RTC-compiling for HIP + if (GetProcessingSettings().hipOverrideAMDEUSperCU > 0) { + mParDevice->par_AMD_EUS_PER_CU = GetProcessingSettings().hipOverrideAMDEUSperCU; + } else if (mParDevice->par_AMD_EUS_PER_CU <= 0) { + GPUFatal("AMD_EUS_PER_CU not set in the parameters provided for the AMD GPU, you can override this via --PROChipOverrideAMDEUSperCU [n]"); + } + } const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true, mParDevice->par_AMD_EUS_PER_CU ? (mParDevice->par_AMD_EUS_PER_CU * mWarpSize) : 0) + "#define GPUCA_WARP_SIZE " + std::to_string(mWarpSize) + "\n"; if (GetProcessingSettings().rtctech.printLaunchBounds || GetProcessingSettings().debugLevel >= 3) { diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index c61056466929e..ea15ecde78c21 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -372,6 +372,7 @@ AddOption(tpcWriteClustersAfterRejection, bool, false, "", 0, "Apply TPC rejecti AddOption(oclPlatformNum, int32_t, -1, "", 0, "Platform to use, in case the backend provides multiple platforms (OpenCL only, -1 = auto-select, -2 query all platforms (also incompatible))") AddOption(oclCompileFromSources, bool, false, "", 0, "Compile OpenCL binary from included source code instead of using included spirv code") AddOption(oclOverrideSourceBuildFlags, std::string, "", "", 0, "Override OCL build flags for compilation from source, put a space for empty options") +AddOption(hipOverrideAMDEUSperCU, int32_t, -1, "", 0, "Override AMD_EUS_PER_CU setting") AddOption(printSettings, bool, false, "", 0, "Print all settings when initializing") AddOption(tpcFreeAllocatedMemoryAfterProcessing, bool, false, "", 0, "Clean all memory allocated by TPC when TPC processing done, only data written to external output resources will remain") AddOption(debugOnFailure, int32_t, 0, "", 0, "Dump raw data in case an error occured, bit 1 enables all dumps, otherwise bitmask for: 2 = signal, 3 = GPUErrorCode", def(1))