轉載:UE5 中的computer shader使用 - 知乎 (zhihu.com)
目標
- 通過藍圖輸入參數,經過Compture Shader做矩陣運算
流程
1. 新建插件
2. 插件設置
3. 聲明和GPU內存對齊的參數結構
4. 聲明Compture Shader結構
5. 參數綁定
6. 著色器實現
7. 分配 work groups
8. 計算和輸出
9. 額外添加參數
1. 新建插件
新建空白插件即可,正常插件創建流程,看官方文檔,
2. 插件設置
XXX.Build.cs
PrivateDependencyModuleNames.AddRange(new string[]{"CoreUObject","Engine","Renderer","RenderCore","RHI","Projects"// ... add private dependencies that you statically link with here ... });
XXX.uplugin
"Modules": [{"Name": "CS_Test","Type": "Runtime","LoadingPhase": "PostConfigInit"}]
3. 聲明和GPU內存對齊的參數結構
struct CS_TEST_API FMySimpleComputeShaderDispatchParams
{int X;int Y;int Z;int Input[2];int Output;FMySimpleComputeShaderDispatchParams(int x, int y, int z): X(x), Y(y), Z(z){}
};
4. 聲明Compture Shader結構和參數綁定
MySimpleComputeShader.cpp
#include "MySimpleComputeShader.h"
#include "../../../Shaders/Public/MySimpleComputeShader.h"
#include "PixelShaderUtils.h"
#include "RenderCore/Public/RenderGraphUtils.h"
#include "MeshPassProcessor.inl"
#include "StaticMeshResources.h"
#include "DynamicMeshBuilder.h"
#include "RenderGraphResources.h"
#include "GlobalShader.h"
#include "UnifiedBuffer.h"
#include "CanvasTypes.h"
#include "MaterialShader.h"DECLARE_STATS_GROUP(TEXT("MySimpleComputeShader"), STATGROUP_MySimpleComputeShader, STATCAT_Advanced);
DECLARE_CYCLE_STAT(TEXT("MySimpleComputeShader Execute"), STAT_MySimpleComputeShader_Execute, STATGROUP_MySimpleComputeShader);// This class carries our parameter declarations and acts as the bridge between cpp and HLSL.
class CS_TEST_API FMySimpleComputeShader : public FGlobalShader
{
public:DECLARE_GLOBAL_SHADER(FMySimpleComputeShader);SHADER_USE_PARAMETER_STRUCT(FMySimpleComputeShader, FGlobalShader);class FMySimpleComputeShader_Perm_TEST : SHADER_PERMUTATION_INT("TEST", 1);using FPermutationDomain = TShaderPermutationDomain<FMySimpleComputeShader_Perm_TEST>;BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )/** Here's where you define one or more of the input parameters for your shader.* Some examples:*/// SHADER_PARAMETER(uint32, MyUint32) // On the shader side: uint32 MyUint32;// SHADER_PARAMETER(FVector3f, MyVector) // On the shader side: float3 MyVector;// SHADER_PARAMETER_TEXTURE(Texture2D, MyTexture) // On the shader side: Texture2D<float4> MyTexture; (float4 should be whatever you expect each pixel in the texture to be, in this case float4(R,G,B,A) for 4 channels)// SHADER_PARAMETER_SAMPLER(SamplerState, MyTextureSampler) // On the shader side: SamplerState MySampler; // CPP side: TStaticSamplerState<ESamplerFilter::SF_Bilinear>::GetRHI();// SHADER_PARAMETER_ARRAY(float, MyFloatArray, [3]) // On the shader side: float MyFloatArray[3];// SHADER_PARAMETER_UAV(RWTexture2D<FVector4f>, MyTextureUAV) // On the shader side: RWTexture2D<float4> MyTextureUAV;// SHADER_PARAMETER_UAV(RWStructuredBuffer<FMyCustomStruct>, MyCustomStructs) // On the shader side: RWStructuredBuffer<FMyCustomStruct> MyCustomStructs;// SHADER_PARAMETER_UAV(RWBuffer<FMyCustomStruct>, MyCustomStructs) // On the shader side: RWBuffer<FMyCustomStruct> MyCustomStructs;// SHADER_PARAMETER_SRV(StructuredBuffer<FMyCustomStruct>, MyCustomStructs) // On the shader side: StructuredBuffer<FMyCustomStruct> MyCustomStructs;// SHADER_PARAMETER_SRV(Buffer<FMyCustomStruct>, MyCustomStructs) // On the shader side: Buffer<FMyCustomStruct> MyCustomStructs;// SHADER_PARAMETER_SRV(Texture2D<FVector4f>, MyReadOnlyTexture) // On the shader side: Texture2D<float4> MyReadOnlyTexture;// SHADER_PARAMETER_STRUCT_REF(FMyCustomStruct, MyCustomStruct)SHADER_PARAMETER_RDG_BUFFER_SRV(Buffer<int>, Input)SHADER_PARAMETER_RDG_BUFFER_UAV(RWBuffer<int>, Output)END_SHADER_PARAMETER_STRUCT()public:static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters){const FPermutationDomain PermutationVector(Parameters.PermutationId);return true;}static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment){FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);const FPermutationDomain PermutationVector(Parameters.PermutationId);/** Here you define constants that can be used statically in the shader code.* Example:*/// OutEnvironment.SetDefine(TEXT("MY_CUSTOM_CONST"), TEXT("1"));/** These defines are used in the thread count section of our shader*/OutEnvironment.SetDefine(TEXT("THREADS_X"), NUM_THREADS_MySimpleComputeShader_X);OutEnvironment.SetDefine(TEXT("THREADS_Y"), NUM_THREADS_MySimpleComputeShader_Y);OutEnvironment.SetDefine(TEXT("THREADS_Z"), NUM_THREADS_MySimpleComputeShader_Z);// This shader must support typed UAV load and we are testing if it is supported at runtime using RHIIsTypedUAVLoadSupported//OutEnvironment.CompilerFlags.Add(CFLAG_AllowTypedUAVLoads);// FForwardLightingParameters::ModifyCompilationEnvironment(Parameters.Platform, OutEnvironment);}
private:
};// This will tell the engine to create the shader and where the shader entry point is.
// ShaderType ShaderPath Shader function name Type
IMPLEMENT_GLOBAL_SHADER(FMySimpleComputeShader, "/Plugin/CS_Test/Private/MySimpleComputeShader.usf", "MySimpleComputeShader", SF_Compute);void FMySimpleComputeShaderInterface::DispatchRenderThread(FRHICommandListImmediate& RHICmdList, FMySimpleComputeShaderDispatchParams Params, TFunction<void(int OutputVal)> AsyncCallback) {FRDGBuilder GraphBuilder(RHICmdList);{SCOPE_CYCLE_COUNTER(STAT_MySimpleComputeShader_Execute);DECLARE_GPU_STAT(MySimpleComputeShader)RDG_EVENT_SCOPE(GraphBuilder, "MySimpleComputeShader");RDG_GPU_STAT_SCOPE(GraphBuilder, MySimpleComputeShader);typename FMySimpleComputeShader::FPermutationDomain PermutationVector;// Add any static permutation options here// PermutationVector.Set<FMySimpleComputeShader::FMyPermutationName>(12345);TShaderMapRef<FMySimpleComputeShader> ComputeShader(GetGlobalShaderMap(GMaxRHIFeatureLevel), PermutationVector);bool bIsShaderValid = ComputeShader.IsValid();if (bIsShaderValid) {FMySimpleComputeShader::FParameters* PassParameters = GraphBuilder.AllocParameters<FMySimpleComputeShader::FParameters>();const void* RawData = (void*)Params.Input;int NumInputs = 2;int InputSize = sizeof(int);FRDGBufferRef InputBuffer = CreateUploadBuffer(GraphBuilder, TEXT("InputBuffer"), InputSize, NumInputs, RawData, InputSize * NumInputs);PassParameters->Input = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(InputBuffer, PF_R32_SINT));FRDGBufferRef OutputBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateBufferDesc(sizeof(int32), 1),TEXT("OutputBuffer"));PassParameters->Output = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(OutputBuffer, PF_R32_SINT));auto GroupCount = FComputeShaderUtils::GetGroupCount(FIntVector(Params.X, Params.Y, Params.Z), FComputeShaderUtils::kGolden2DGroupSize);GraphBuilder.AddPass(RDG_EVENT_NAME("ExecuteMySimpleComputeShader"),PassParameters,ERDGPassFlags::AsyncCompute,[&PassParameters, ComputeShader, GroupCount](FRHIComputeCommandList& RHICmdList){FComputeShaderUtils::Dispatch(RHICmdList, ComputeShader, *PassParameters, GroupCount);});FRHIGPUBufferReadback* GPUBufferReadback = new FRHIGPUBufferReadback(TEXT("ExecuteMySimpleComputeShaderOutput"));AddEnqueueCopyPass(GraphBuilder, GPUBufferReadback, OutputBuffer, 0u);auto RunnerFunc = [GPUBufferReadback, AsyncCallback](auto&& RunnerFunc) -> void {if (GPUBufferReadback->IsReady()) {int32* Buffer = (int32*)GPUBufferReadback->Lock(1);int OutVal = Buffer[0];GPUBufferReadback->Unlock();AsyncTask(ENamedThreads::GameThread, [AsyncCallback, OutVal]() {AsyncCallback(OutVal);});delete GPUBufferReadback;} else {AsyncTask(ENamedThreads::ActualRenderingThread, [RunnerFunc]() {RunnerFunc(RunnerFunc);});}};AsyncTask(ENamedThreads::ActualRenderingThread, [RunnerFunc]() {RunnerFunc(RunnerFunc);});} else {// We silently exit here as we don't want to crash the game if the shader is not found or has an error.}}GraphBuilder.Execute();
}
MySimpleComputeShader.h
#pragma once#include "CoreMinimal.h"
#include "GenericPlatform/GenericPlatformMisc.h"
#include "Kismet/BlueprintAsyncActionBase.h"#include "MySimpleComputeShader.generated.h"struct CS_TEST_API FMySimpleComputeShaderDispatchParams
{int X;int Y;int Z;int Input[2];int Output;FMySimpleComputeShaderDispatchParams(int x, int y, int z): X(x), Y(y), Z(z){}
};// This is a public interface that we define so outside code can invoke our compute shader.
class CS_TEST_API FMySimpleComputeShaderInterface {
public:// Executes this shader on the render threadstatic void DispatchRenderThread(FRHICommandListImmediate& RHICmdList,FMySimpleComputeShaderDispatchParams Params,TFunction<void(int OutputVal)> AsyncCallback);// Executes this shader on the render thread from the game thread via EnqueueRenderThreadCommandstatic void DispatchGameThread(FMySimpleComputeShaderDispatchParams Params,TFunction<void(int OutputVal)> AsyncCallback){ENQUEUE_RENDER_COMMAND(SceneDrawCompletion)([Params, AsyncCallback](FRHICommandListImmediate& RHICmdList){DispatchRenderThread(RHICmdList, Params, AsyncCallback);});}// Dispatches this shader. Can be called from any threadstatic void Dispatch(FMySimpleComputeShaderDispatchParams Params,TFunction<void(int OutputVal)> AsyncCallback){if (IsInRenderingThread()) {DispatchRenderThread(GetImmediateCommandList_ForRenderCommand(), Params, AsyncCallback);}else{DispatchGameThread(Params, AsyncCallback);}}
};DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnMySimpleComputeShaderLibrary_AsyncExecutionCompleted, const int, Value);UCLASS() // Change the _API to match your project
class CS_TEST_API UMySimpleComputeShaderLibrary_AsyncExecution : public UBlueprintAsyncActionBase
{GENERATED_BODY()public:// Execute the actual loadvirtual void Activate() override {// Create a dispatch parameters struct and fill it the input array with our argsFMySimpleComputeShaderDispatchParams Params(1, 1, 1);Params.Input[0] = Arg1;Params.Input[1] = Arg2;// Dispatch the compute shader and wait until it completesFMySimpleComputeShaderInterface::Dispatch(Params, [this](int OutputVal) {this->Completed.Broadcast(OutputVal);});}UFUNCTION(BlueprintCallable, meta = (BlueprintInternalUseOnly = "true", Category = "ComputeShader", WorldContext = "WorldContextObject"))static UMySimpleComputeShaderLibrary_AsyncExecution* ExecuteBaseComputeShader(UObject* WorldContextObject, int Arg1, int Arg2) {UMySimpleComputeShaderLibrary_AsyncExecution* Action = NewObject<UMySimpleComputeShaderLibrary_AsyncExecution>();Action->Arg1 = Arg1;Action->Arg2 = Arg2;Action->RegisterWithGameInstance(WorldContextObject);return Action;}UPROPERTY(BlueprintAssignable)FOnMySimpleComputeShaderLibrary_AsyncExecutionCompleted Completed;int Arg1;int Arg2;};
6. 著色器實現
MySimpleComputeShader.usf
#include "/Engine/Public/Platform.ush"Buffer<int> Input;
RWBuffer<int> Output;[numthreads(THREADS_X, THREADS_Y, THREADS_Z)]
void MySimpleComputeShader(uint3 DispatchThreadId : SV_DispatchThreadID,uint GroupIndex : SV_GroupIndex )
{// Outputs one numberOutput[0] = Input[0] * Input[1];
}
7. 分配 work groups
關于整個解釋
https://learnopengl.com/Guest-Articles/2022/Compute-Shaders/Introduction?learnopengl.com/Guest-Articles/2022/Compute-Shaders/Introduction
[numthreads(THREADS_X, THREADS_Y, THREADS_Z)]
是在HLSL中分配計算空間的語法