ue4 4.21 shader notes(1)

写在前面
经过大概三天的折磨,我希望从一个简单地切入点学习渲染的想法泡汤了。没有银弹,还是老老实实看ue4源码好了。

正文
4.21/Engine/Shaders/Public/FP16Math.ush

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
/** 定义使用16位浮点half,32位浮点float,还是32位浮点的低16位min16float */

/*=============================================================================
FP16Math.usf: Defines for using FP16 or FP32
=============================================================================*/
#pragma once

#ifndef EXPERIMENTAL_FP16 // 默认开启16位 有两种选择:half 和 min16float
#define EXPERIMENTAL_FP16 1
#endif

#ifndef USE_MIN16FLOAT // 默认不使用 min16float
#define USE_MIN16FLOAT 0
#endif

#if EXPERIMENTAL_FP16
#if USE_MIN16FLOAT
#define EXP_FLT min16float // 开启USE_MIN16FLOAT宏,则使用 min16float
#define EXP_FLT2 min16float2
#define EXP_FLT3 min16float3
#define EXP_FLT4 min16float4
#define EXP_FLT3x3 min16float3x3
#define EXP_FLT4x4 min16float4x4
#define EXP_FLT4x3 min16float4x3
#else
#define EXP_FLT half // 否则使用 half
#define EXP_FLT2 half2
#define EXP_FLT3 half3
#define EXP_FLT4 half4
#define EXP_FLT3x3 half3x3
#define EXP_FLT4x4 half4x4
#define EXP_FLT4x3 half4x3
#endif
#else
#define EXP_FLT float // 关闭EXPERIMENTAL_FP16宏,则使用 float
#define EXP_FLT2 float2
#define EXP_FLT3 float3
#define EXP_FLT4 float4
#define EXP_FLT3x3 float3x3
#define EXP_FLT4x4 float4x4
#define EXP_FLT4x3 float4x3
#endif

4.21/Engine/Shaders/Public/Platform.ush

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
/** 开启平台相关的宏,以及加入少量辅助方法 */

/*=============================================================================
Prefix.usf: USF file automatically included by shader preprocessor.
=============================================================================*/

#pragma once

#include "FP16Math.ush"

// ---------------------------------------------------- Profile or compiler specific includes
// TODO: Have shader compiler including these platform specific USF files, that needs to work
// with ShaderCore.cpp's GetShaderIncludes().
// 引入平台对应的头文件
#if PS4_PROFILE // ps4 平台
// always #include PS4Common.usf so it can #define override anything in any other included file.
#include "Platform/PS4/PS4Common.ush"
#endif

#if XBOXONE_PROFILE // xbox1 平台
#include "Platform/XboxOne/XboxOneCommon.ush"
#endif

#if SWITCH_PROFILE || SWITCH_PROFILE_FORWARD // switch 平台
#include "Platform/Switch/SwitchCommon.ush"
#endif

#if COMPILER_METAL // metal 底层
// Helps with iteration when changing Metal shader code generation backend.
#include "Platform/Metal/MetalCommon.ush"
#endif

#if COMPILER_VULKAN // vulkan 底层
// Helps with iteration when changing Vulkan shader code generation backend.
#include "Platform/Vulkan/VulkanCommon.ush"
#endif


// ---------------------------------------------------- DDC invalidation
// to support the console command "r.InvalidateShaderCache"
#include "ShaderVersion.ush"


// ---------------------------------------------------- COMPILE_* and *_PROFILE defaults
// 定义shader语言宏
#ifndef COMPILER_HLSLCC // hlslcc 语言
#define COMPILER_HLSLCC 0
#endif

#ifndef COMPILER_HLSL // hlsl 语言
#define COMPILER_HLSL 0
#endif

#ifndef COMPILER_GLSL // glsl 语言
#define COMPILER_GLSL 0
#endif

#ifndef COMPILER_GLSL_ES2 //glsl es2 语言
#define COMPILER_GLSL_ES2 0
#endif

#ifndef COMPILER_GLSL_ES3_1 //glsl es3.1语言
#define COMPILER_GLSL_ES3_1 0
#endif

#ifndef COMPILER_GLSL_ES3_1_EXT //glsl es3.1 ext 语言
#define COMPILER_GLSL_ES3_1_EXT 0
#endif

#ifndef COMPILER_METAL //metel 语言
#define COMPILER_METAL 0
#endif
// 定义语言特性宏
#ifndef COMPILER_SUPPORTS_ATTRIBUTES
#define COMPILER_SUPPORTS_ATTRIBUTES 0
#endif

#ifndef PLATFORM_SUPPORTS_SRV_UB
#define PLATFORM_SUPPORTS_SRV_UB 0
#endif

#ifndef SM5_PROFILE
#define SM5_PROFILE 0
#endif

#ifndef SM4_PROFILE
#define SM4_PROFILE 0
#endif

#ifndef OPENGL_PROFILE
#define OPENGL_PROFILE 0
#endif

#ifndef ES2_PROFILE
#define ES2_PROFILE 0
#endif

#ifndef ES3_1_PROFILE
#define ES3_1_PROFILE 0
#endif

#ifndef METAL_PROFILE
#define METAL_PROFILE 0
#endif

#ifndef METAL_MRT_PROFILE
#define METAL_MRT_PROFILE 0
#endif

#ifndef METAL_SM5_NOTESS_PROFILE
#define METAL_SM5_NOTESS_PROFILE 0
#endif

#ifndef METAL_SM5_PROFILE
#define METAL_SM5_PROFILE 0
#endif

#ifndef COMPILER_VULKAN
#define COMPILER_VULKAN 0
#endif

#ifndef VULKAN_PROFILE
#define VULKAN_PROFILE 0
#endif

#ifndef VULKAN_PROFILE_SM4
#define VULKAN_PROFILE_SM4 0
#endif

#ifndef VULKAN_PROFILE_SM5
#define VULKAN_PROFILE_SM5 0
#endif

#ifndef IOS
#define IOS 0
#endif

#ifndef MAC
#define MAC 0
#endif
// 依据上面定义的宏,开启对应平台特性
// 'static' asserts
#if COMPILER_GLSL || COMPILER_GLSL_ES2 || COMPILER_GLSL_ES3_1 || COMPILER_VULKAN || COMPILER_METAL
#if !COMPILER_HLSLCC // 以上平台需要编译 hlslcc
#error "Missing COMPILER_HLSLCC define!"
#endif
#endif


#if PLATFORM_SUPPORTS_SRV_UB
#define PLATFORM_SUPPORTS_SRV_UB_MACRO(...) __VA_ARGS__ // __VA_ARGS__表示可变参数
#else
#define PLATFORM_SUPPORTS_SRV_UB_MACRO(...)
#endif

// ---------------------------------------------------- Alternative floating point types

#ifndef FORCE_FLOATS
#define FORCE_FLOATS 0
#endif
// 只对es2编译器开启 float
#if (!(COMPILER_GLSL_ES2 || COMPILER_GLSL_ES3_1 || METAL_PROFILE) || FORCE_FLOATS)
// Always use floats when not using the ES2 compiler, because low precision modifiers are currently only tweaked for ES2,
// And we don't want potential side effects on other platforms
#define half float
#define half1 float1
#define half2 float2
#define half3 float3
#define half4 float4
#define half3x3 float3x3
#define half4x4 float4x4
#define half4x3 float4x3
#define fixed float
#define fixed1 float1
#define fixed2 float2
#define fixed3 float3
#define fixed4 float4
#define fixed3x3 float3x3
#define fixed4x4 float4x4
#define fixed4x3 float4x3
#endif

// ---------------------------------------------------- Profile config
// 定义shader model 等级
// Values of FEATURE_LEVEL.
#define FEATURE_LEVEL_ES2 1
#define FEATURE_LEVEL_ES3_1 2
#define FEATURE_LEVEL_SM3 3
#define FEATURE_LEVEL_SM4 4
#define FEATURE_LEVEL_SM5 5
#define FEATURE_LEVEL_MAX 6

#if PS4_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_SM5

#elif SM5_PROFILE
// SM5 = full dx11 features (high end UE4 rendering)
#define FEATURE_LEVEL FEATURE_LEVEL_SM5

#elif SM4_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_SM4

#elif SWITCH_PROFILE || SWITCH_PROFILE_FORWARD
#undef ES3_1_PROFILE

#if SWITCH_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_SM5
#else
#define FEATURE_LEVEL FEATURE_LEVEL_ES3_1
// @todo switch: maybe all uses of this should check feature level not profile?
#define ES3_1_PROFILE 1
#endif

#elif VULKAN_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_ES3_1

#elif VULKAN_PROFILE_SM4
#define FEATURE_LEVEL FEATURE_LEVEL_SM4

#elif VULKAN_PROFILE_SM5
#define FEATURE_LEVEL FEATURE_LEVEL_SM5
#define STENCIL_COMPONENT_SWIZZLE .x // 这里定义stencil 返回x分量
// swizzle的用法举个例子:v.xxyz


#elif GL3_PROFILE || GL4_PROFILE
#if GL3_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_SM4
#elif GL4_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_SM5
#endif

// A8 textures when sampled have their component in R.
#define A8_SAMPLE_MASK .r // 采样8bit a分量时返回的时r分量

// hacks until the shader compiler supports those
#if GL4_PROFILE
#define class struct // hack class 为 struct
#endif

#elif METAL_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_ES3_1
// @todo metal: remove this and make sure all uses handle METAL_PROFILE
#undef ES3_1_PROFILE
#define ES3_1_PROFILE 1
#define FCOLOR_COMPONENT_SWIZZLE .bgra
#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
#define STENCIL_COMPONENT_SWIZZLE .x

#elif METAL_ES2_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_ES2
// @todo metal: remove this and make sure all uses handle METAL_ES2_PROFILE
#undef ES2_PROFILE
#define ES2_PROFILE 1
#define FCOLOR_COMPONENT_SWIZZLE .bgra
#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
#define STENCIL_COMPONENT_SWIZZLE .x

#elif METAL_MRT_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_SM5
#define FCOLOR_COMPONENT_SWIZZLE .bgra
#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
#define STENCIL_COMPONENT_SWIZZLE .x

#elif METAL_SM5_NOTESS_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_SM5
#define FCOLOR_COMPONENT_SWIZZLE .bgra
#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
#define STENCIL_COMPONENT_SWIZZLE .x

#elif METAL_SM5_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_SM5
#define FCOLOR_COMPONENT_SWIZZLE .bgra
#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
#define STENCIL_COMPONENT_SWIZZLE .x

#elif ES2_PROFILE || ES3_1_PROFILE
#if ES3_1_PROFILE
#define FEATURE_LEVEL FEATURE_LEVEL_ES3_1
#else
//@todo ES3_1 GL
#define FEATURE_LEVEL FEATURE_LEVEL_ES2
#endif

#if COMPILER_GLSL_ES2 || COMPILER_GLSL_ES3_1
// Swizzle as we only support GL_BGRA on non-ES2 platforms that have that extension
#define FCOLOR_COMPONENT_SWIZZLE .bgra
#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
#else
#define FCOLOR_COMPONENT_SWIZZLE .rgba
#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
#if COMPILER_GLSL
// A8 textures when sampled have their component in R
#define A8_SAMPLE_MASK .r
#endif
#endif

#else

#error Add your platform here

#define FEATURE_LEVEL FEATURE_LEVEL_MAX

#endif


// ---------------------------------------------------- Swizzle defaults

// If we didn't request color component swizzling, just make it empty
#ifndef FCOLOR_COMPONENT_SWIZZLE
#define FCOLOR_COMPONENT_SWIZZLE .rgba
#endif

#ifndef FMANUALFETCH_COLOR_COMPONENT_SWIZZLE
#define FMANUALFETCH_COLOR_COMPONENT_SWIZZLE .bgra
#endif

#ifndef STENCIL_COMPONENT_SWIZZLE
#define STENCIL_COMPONENT_SWIZZLE .g
#endif

#ifndef A8_SAMPLE_MASK
#define A8_SAMPLE_MASK .a
#endif

// ---------------------------------------------------- Platform dependent supports

// non-editor platforms generally never want development/editor features.
#ifndef PLATFORM_SUPPORTS_DEVELOPMENT_SHADERS
#define PLATFORM_SUPPORTS_DEVELOPMENT_SHADERS !ESDEFERRED_PROFILE
#endif

#ifndef MOBILE_EMULATION
#define MOBILE_EMULATION ((FEATURE_LEVEL == FEATURE_LEVEL_ES2 || FEATURE_LEVEL == FEATURE_LEVEL_ES3_1) && (!(COMPILER_GLSL_ES3_1 || COMPILER_GLSL_ES2) && USE_DEVELOPMENT_SHADERS && (!(METAL_PROFILE) || MAC) && !VULKAN_PROFILE))
#endif

// Whether the platform supports independent texture and samplers
// When enabled, different texture lookups can share samplers to allow more artist samplers in the base pass
// Ideally this would just be enabled for all SM4 and above feature level platforms
// @todo metal mrt: No reason this can't work with Metal, once cross compiler is fixed
#ifndef SUPPORTS_INDEPENDENT_SAMPLERS
#define SUPPORTS_INDEPENDENT_SAMPLERS (PS4_PROFILE || SM5_PROFILE || SM4_PROFILE || METAL_MRT_PROFILE || METAL_SM5_NOTESS_PROFILE || METAL_SM5_PROFILE || VULKAN_PROFILE_SM5 || VULKAN_PROFILE_SM4 || VULKAN_PROFILE)
#endif

// Whether the platform supports a global clip plane through SV_ClipDistance
// Ideally this would just be enabled for all SM4 and above feature level platforms, but not tested everywhere yet
#define PLATFORM_SUPPORTS_GLOBAL_CLIP_PLANE (PS4_PROFILE || SM5_PROFILE || SM4_PROFILE || METAL_PROFILE || METAL_MRT_PROFILE || METAL_SM5_NOTESS_PROFILE || METAL_SM5_PROFILE || GL4_PROFILE || GL3_PROFILE || VULKAN_PROFILE_SM4 || VULKAN_PROFILE_SM5)

// Whether the platform support pixel coverage on MSAA targets (SV_Coverage).
#define SUPPORTS_PIXEL_COVERAGE (FEATURE_LEVEL >= FEATURE_LEVEL_SM5 && !COMPILER_GLSL && !MOBILE_EMULATION)

// Must match C++ RHISupports4ComponentUAVReadWrite
// D3D11 does not support multi-component loads from a UAV: "error X3676: typed UAV loads are only allowed for single-component 32-bit element types"
#define PLATFORM_SUPPORTS_4COMPONENT_UAV_READ_WRITE (PS4_PROFILE || XBOXONE_PROFILE || COMPILER_METAL)


// ---------------------------------------------------- Compiler specific defaults and fallbacks

/** Defined only for Metal's combined Vertex + Hull shader */
#ifndef TESSELLATIONSHADER
#define TESSELLATIONSHADER 0
#endif

// Hlslcc platforms ignore the uniform keyword as it can't properly optimize flow
#if COMPILER_HLSLCC
#define uniform
#endif

// If compiler lane management in a wave.
// WaveGetLaneCount()
// WaveGetLaneIndex()
// if (WaveOnce()) { ... }
#ifndef COMPILER_SUPPORTS_WAVE_ONCE
#define COMPILER_SUPPORTS_WAVE_ONCE 0
#endif

// Whether the compiler exposes voting on all lanes:
// WaveAnyTrue(MyBool)
// WaveAnyTrue(MyBool)
// WaveAllEqual(MyBool)
#ifndef COMPILER_SUPPORTS_WAVE_VOTE
#define COMPILER_SUPPORTS_WAVE_VOTE 0
#endif

// Whether the compiler exposes min max instructions across all lane of the wave.
// WaveAllMin(MyFloat)
// WaveAllMin(MyInt)
// WaveAllMin(MyUint)
// WaveAllMax(MyFloat)
// WaveAllMax(MyInt)
// WaveAllMax(MyUint)
#ifndef COMPILER_SUPPORTS_WAVE_MINMAX
#define COMPILER_SUPPORTS_WAVE_MINMAX 0
#endif

// Whether the compiler exposes OR and AND bit operation all lanes:
// WaveAllBitAnd(MyMask)
// WaveAllBitOr(MyMask)
#ifndef COMPILER_SUPPORTS_WAVE_BIT_ORAND
#define COMPILER_SUPPORTS_WAVE_BIT_ORAND 0
#endif

// Whether the compiler exposes GCN's ds_swizzle_b32 instruction.
// float WaveLaneSwizzleGCN(float x, const uint and_mask, const uint or_mask, const uint xor_mask)
#ifndef COMPILER_SUPPORTS_WAVE_SWIZZLE_GCN
#define COMPILER_SUPPORTS_WAVE_SWIZZLE_GCN 0
#endif

// Mirrors GRHISupportsRectTopology.
#ifndef PLATFORM_SUPPORTS_RECT_LIST
#define PLATFORM_SUPPORTS_RECT_LIST 0
#endif


// ---------------------------------------------------- Compiler attributes

#if SM5_PROFILE || COMPILER_SUPPORTS_ATTRIBUTES

/** Avoids flow control constructs. */
#define UNROLL [unroll]

/** Gives preference to flow control constructs. */
#define LOOP [loop]

/** Performs branching by using control flow instructions like jmp and label. */
#define BRANCH [branch]

/** Performs branching by using the cnd instructions. */
#define FLATTEN [flatten]

/** Allows a compute shader loop termination condition to be based off of a UAV read. The loop must not contain synchronization intrinsics. */
#define ALLOW_UAV_CONDITION [allow_uav_condition]

#endif // SM5_PROFILE || COMPILER_SUPPORTS_ATTRIBUTES

#if SM5_PROFILE || METAL_MRT_PROFILE || METAL_SM5_PROFILE || METAL_SM5_NOTESS_PROFILE
#define EARLYDEPTHSTENCIL [earlydepthstencil]
#endif


// ---------------------------------------------------- Compiler attribute fallbacks
// 对于不支持的特性,在这里抹去宏
#ifndef UNROLL
#define UNROLL
#endif

#ifndef LOOP
#define LOOP
#endif

#ifndef BRANCH
#define BRANCH
#endif

#ifndef FLATTEN
#define FLATTEN
#endif

#ifndef ALLOW_UAV_CONDITION
#define ALLOW_UAV_CONDITION
#endif

#ifndef INVARIANT
#define INVARIANT
#endif

#ifndef ENABLE_RE_Z
#define ENABLE_RE_Z
#endif

#ifndef EARLYDEPTHSTENCIL
#define EARLYDEPTHSTENCIL
#endif

#ifndef STRONG_TYPE
#define STRONG_TYPE
#endif

#ifndef StrongTypedBuffer
#define StrongTypedBuffer Buffer
#endif

// ---------------------------------------------------- Interpolator attribute fallbacks

#ifndef COMPRESSED_16_FLOAT
#define COMPRESSED_16_FLOAT
#endif

#ifndef COMPRESSED_16_UNORM
#define COMPRESSED_16_UNORM
#endif

#ifndef COMPRESSED_16_SNORM
#define COMPRESSED_16_SNORM
#endif

#ifndef COMPRESSED_16_UINT
#define COMPRESSED_16_UINT
#endif

#ifndef COMPRESSED_16_INT
#define COMPRESSED_16_INT
#endif

#ifndef COMPRESSED_8_UNORM
#define COMPRESSED_8_UNORM
#endif

#ifndef COMPRESSED_8_SNORM
#define COMPRESSED_8_SNORM
#endif

#ifndef COMPRESSED_8_UINT
#define COMPRESSED_8_UINT
#endif


// ---------------------------------------------------- Global uses

#define USE_DEVELOPMENT_SHADERS (COMPILE_SHADERS_FOR_DEVELOPMENT && PLATFORM_SUPPORTS_DEVELOPMENT_SHADERS)


// ---------------------------------------------------- Standard sizes of the indirect parameter structs

// sizeof(FRHIDispatchIndirectParameters) / sizeof(uint)
#define DISPATCH_INDIRECT_UINT_COUNT 3

// sizeof(FRHIDrawIndirectParameters) / sizeof(uint)
#define DRAW_INDIRECT_UINT_COUNT 4

// sizeof(FRHIDrawIndexedIndirectParameters) / sizeof(uint)
#define DRAW_INDEXED_INDIRECT_UINT_COUNT 5


// ---------------------------------------------------- Compiler missing implementations
// 在特定平台补充辅助方法,求行列式
#if COMPILER_GLSL_ES2 || (COMPILER_METAL && MAX_SHADER_LANGUAGE_VERSION < 2) || COMPILER_SWITCH

float determinant(float3x3 M)
{
return
M[0][0] * (M[1][1] * M[2][2] - M[1][2] * M[2][1]) -
M[1][0] * (M[0][1] * M[2][2] - M[0][2] * M[2][1]) +
M[2][0] * (M[0][1] * M[1][2] - M[0][2] * M[1][1]);
}

#endif

#if COMPILER_HLSLCC // 补充 求log10(x)
#define log10(x) log((x)) / log(10.0)
#endif


#if !COMPILER_SUPPORTS_MINMAX3 //补充 求min max 方法

float min3( float a, float b, float c )
{
return min( a, min( b, c ) );
}

float max3( float a, float b, float c )
{
return max( a, max( b, c ) );
}

float2 min3( float2 a, float2 b, float2 c )
{
return float2(
min3( a.x, b.x, c.x ),
min3( a.y, b.y, c.y )
);
}

float2 max3( float2 a, float2 b, float2 c )
{
return float2(
max3( a.x, b.x, c.x ),
max3( a.y, b.y, c.y )
);
}

float3 max3( float3 a, float3 b, float3 c )
{
return float3(
max3( a.x, b.x, c.x ),
max3( a.y, b.y, c.y ),
max3( a.z, b.z, c.z )
);
}

float3 min3( float3 a, float3 b, float3 c )
{
return float3(
min3( a.x, b.x, c.x ),
min3( a.y, b.y, c.y ),
min3( a.z, b.z, c.z )
);
}

float4 min3( float4 a, float4 b, float4 c )
{
return float4(
min3( a.x, b.x, c.x ),
min3( a.y, b.y, c.y ),
min3( a.z, b.z, c.z ),
min3( a.w, b.w, c.w )
);
}

float4 max3( float4 a, float4 b, float4 c )
{
return float4(
max3( a.x, b.x, c.x ),
max3( a.y, b.y, c.y ),
max3( a.z, b.z, c.z ),
max3( a.w, b.w, c.w )
);
}

#endif


#if COMPILER_HLSLCC //抹平求偏差方法
#define ddx_fine(x) ddx(x)
#define ddy_fine(y) ddy(y)
#endif

// Give hint to compiler to move one value to scalar unit.
#if !defined(ToScalarMemory) && !defined(COMPILER_SUPPORTS_TO_SCALAR_MEMORY)
#define ToScalarMemory(x) (x)
#endif

#if FEATURE_LEVEL < FEATURE_LEVEL_ES3_1 && !COMPILER_METAL
// DX11 (feature levels >= 10) feature sets natively supports uints in shaders; we just use floats on other platforms.
#define uint4 int4 //抹平uint4支持
#endif

#if COMPILER_HLSLCC || PS4_PROFILE
#define SNORM
#define UNORM
#else
#define SNORM snorm
#define UNORM unorm
#endif

4.21/Engine/Shaders/Public/ShaderVersion.ush

1
2
3
4
5
6
/** shader 版本号,用于版本校验 */
// This file is automatically generated by the console command r.InvalidateCachedShaders
// Each time the console command is executed it generates a new GUID. As this file is included
// in Platform.ush (which should be included in any shader) it allows to invalidate the shader DDC.
//
// GUID = F7C9D61CA78A4854B1D6EE5D3F987298

4.21/Engine/Shaders/Public/WaveBroadcastIntrinsics.ush
Wave-Intrinsics

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/** 不太清楚,msdn上说可以提升多线程中的性能。*/
/*=============================================================================
WaveBroadcastIntrinsics.ush: Exposes intrisics to perform broadcasting
within lanes of a same wave.
=============================================================================*/

#pragma once

#include "Platform.ush"


#define PLATFORM_SUPPORTS_WAVE_BROADCAST (COMPILER_SUPPORTS_WAVE_SWIZZLE_GCN)

#if PLATFORM_SUPPORTS_WAVE_BROADCAST

/** Swap left lane with righ lanes within lane group (size is power of two in [2; 64]).
*
* If a lane is not active, the VGPR value returned is 0.
*
* LaneGroupSize = 8
* LaneId = 1
*
* | lane group (size=8) |
* x = | 0 1 2 3| 4 5 6 7| 8 9 ...
*
* return | 4 5 6 7| 0 1 2 3|12 13 ...
*/
float WaveSwapWithinLaneGroup(float x, const uint LaneGroupSize)
{
const uint and_mask = 0x1F;
const uint or_mask = 0x00;
const uint xor_mask = LaneGroupSize >> 1;
return WaveLaneSwizzleGCN(x, and_mask, or_mask, xor_mask);
}

/** Broadcast inner lane group over a lane group (size is power of two in [2; 64]).
*
* If a lane is not active, the VGPR value returned is 0.
*
* LaneGroupSize = 8
* InnerLaneGroupSize = 2
* InnerLaneGroupId = 1
*
* | lane group (size=8) |
* x = | 0 1 2 3 4 5 6 7| 8 9 ...
*
* return | 2 3 2 3 2 3 2 3|10 11 ...
*/
float WaveBroadcastLaneGroup(float x, const uint LaneGroupSize, const uint InnerLaneGroupSize, const uint InnerLaneGroupId)
{
const uint InnerGroupCount = LaneGroupSize / InnerLaneGroupSize;

const uint and_mask = ~((InnerGroupCount - 1) * InnerLaneGroupSize);
const uint or_mask = InnerLaneGroupId * InnerLaneGroupSize;
const uint xor_mask = 0x00;
return WaveLaneSwizzleGCN(x, and_mask, or_mask, xor_mask);
}


#define __WaveBroadcastOverrideType(Type) \
Type WaveSwapWithinLaneGroup(Type x, const uint LaneGroupSize) \
{ \
return as##Type(WaveSwapWithinLaneGroup(asfloat(x), LaneGroupSize)); \
} \
Type WaveBroadcastLaneGroup(Type x, const uint LaneGroupSize, const uint InnerLaneGroupSize, const uint InnerLaneGroupId) \
{ \
return as##Type(WaveBroadcastLaneGroup(asfloat(x), LaneGroupSize, InnerLaneGroupSize, InnerLaneGroupId)); \
} \


__WaveBroadcastOverrideType(int)
__WaveBroadcastOverrideType(uint)


#undef __WaveBroadcastOverrideType

#endif // PLATFORM_SUPPORTS_WAVE_BROADCAST