ArabDesert/Assets/Editor/x64/Bakery/dilateFloat.ptx

285 lines
11 KiB
Plaintext

//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-23083092
// Cuda compilation tools, release 9.1, V9.1.85
// Based on LLVM 3.4svn
//
.version 6.1
.target sm_30
.address_size 64
// .globl _Z6oxMainv
.global .align 8 .b8 pixelID[8];
.global .align 8 .b8 resolution[8];
.global .align 4 .b8 normal[12];
.global .align 4 .b8 camPos[12];
.global .align 4 .b8 root[4];
.global .align 4 .u32 imageEnabled;
.global .texref lightmap;
.global .align 16 .b8 tileInfo[16];
.global .align 4 .u32 additive;
.global .align 1 .b8 image[1];
.global .align 4 .b8 _ZN21rti_internal_typeinfo7pixelIDE[8] = {82, 97, 121, 0, 8, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo10resolutionE[8] = {82, 97, 121, 0, 8, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo6normalE[8] = {82, 97, 121, 0, 12, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo6camPosE[8] = {82, 97, 121, 0, 12, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo4rootE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo12imageEnabledE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo8tileInfoE[8] = {82, 97, 121, 0, 16, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo8additiveE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
.global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E;
.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE;
.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE;
.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE;
.global .align 8 .b8 _ZN21rti_internal_typename7pixelIDE[6] = {117, 105, 110, 116, 50, 0};
.global .align 8 .b8 _ZN21rti_internal_typename10resolutionE[6] = {117, 105, 110, 116, 50, 0};
.global .align 8 .b8 _ZN21rti_internal_typename6normalE[7] = {102, 108, 111, 97, 116, 51, 0};
.global .align 8 .b8 _ZN21rti_internal_typename6camPosE[7] = {102, 108, 111, 97, 116, 51, 0};
.global .align 16 .b8 _ZN21rti_internal_typename4rootE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0};
.global .align 4 .b8 _ZN21rti_internal_typename12imageEnabledE[4] = {105, 110, 116, 0};
.global .align 8 .b8 _ZN21rti_internal_typename8tileInfoE[6] = {117, 105, 110, 116, 52, 0};
.global .align 4 .b8 _ZN21rti_internal_typename8additiveE[4] = {105, 110, 116, 0};
.global .align 4 .u32 _ZN21rti_internal_typeenum7pixelIDE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum10resolutionE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum6normalE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum6camPosE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum4rootE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum12imageEnabledE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum8tileInfoE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum8additiveE = 4919;
.global .align 16 .b8 _ZN21rti_internal_semantic7pixelIDE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0};
.global .align 16 .b8 _ZN21rti_internal_semantic10resolutionE[12] = {114, 116, 76, 97, 117, 110, 99, 104, 68, 105, 109, 0};
.global .align 16 .b8 _ZN21rti_internal_semantic6normalE[17] = {97, 116, 116, 114, 105, 98, 117, 116, 101, 32, 110, 111, 114, 109, 97, 108, 0};
.global .align 1 .b8 _ZN21rti_internal_semantic6camPosE[1];
.global .align 1 .b8 _ZN21rti_internal_semantic4rootE[1];
.global .align 1 .b8 _ZN21rti_internal_semantic12imageEnabledE[1];
.global .align 1 .b8 _ZN21rti_internal_semantic8tileInfoE[1];
.global .align 1 .b8 _ZN21rti_internal_semantic8additiveE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation7pixelIDE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation10resolutionE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation6normalE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation6camPosE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation4rootE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation12imageEnabledE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation8tileInfoE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation8additiveE[1];
.visible .entry _Z6oxMainv(
)
{
.reg .pred %p<14>;
.reg .f32 %f<134>;
.reg .b32 %r<85>;
.reg .b64 %rd<127>;
ld.global.v2.u32 {%r18, %r19}, [pixelID];
cvt.u64.u32 %rd9, %r18;
cvt.u64.u32 %rd10, %r19;
mov.u64 %rd13, image;
cvta.global.u64 %rd8, %rd13;
mov.u32 %r16, 2;
mov.u32 %r17, 16;
mov.u64 %rd12, 0;
// inline asm
call (%rd7), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd9, %rd10, %rd12, %rd12);
// inline asm
ld.f32 %f46, [%rd7+12];
setp.gt.f32 %p1, %f46, 0f00000000;
@%p1 bra BB0_17;
ld.global.v2.u32 {%r27, %r28}, [pixelID];
cvt.u64.u32 %rd16, %r27;
cvt.u64.u32 %rd17, %r28;
// inline asm
call (%rd14), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd16, %rd17, %rd12, %rd12);
// inline asm
ld.v4.f32 {%f116, %f117, %f118, %f50}, [%rd14];
ld.global.v2.u32 {%r31, %r32}, [pixelID];
setp.eq.s32 %p2, %r31, 0;
add.s32 %r34, %r31, -1;
cvt.u64.u32 %rd27, %r34;
selp.b64 %rd22, 0, %rd27, %p2;
setp.eq.s32 %p3, %r32, 0;
add.s32 %r36, %r32, -1;
cvt.u64.u32 %rd28, %r36;
selp.b64 %rd23, 0, %rd28, %p3;
ld.global.v2.u32 {%r37, %r38}, [resolution];
add.s32 %r40, %r37, -1;
setp.eq.s32 %p4, %r31, %r40;
add.s32 %r41, %r31, 1;
selp.b32 %r1, %r40, %r41, %p4;
add.s32 %r43, %r38, -1;
setp.eq.s32 %p5, %r32, %r43;
add.s32 %r44, %r32, 1;
selp.b32 %r2, %r43, %r44, %p5;
// inline asm
call (%rd20), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd22, %rd23, %rd12, %rd12);
// inline asm
ld.f32 %f51, [%rd20+12];
mov.u32 %r79, 0;
setp.leu.f32 %p6, %f51, 0f00000000;
@%p6 bra BB0_3;
// inline asm
call (%rd29), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd22, %rd23, %rd12, %rd12);
// inline asm
ld.v4.f32 {%f52, %f53, %f54, %f55}, [%rd29];
add.f32 %f116, %f116, %f52;
add.f32 %f117, %f117, %f53;
add.f32 %f118, %f118, %f54;
mov.u32 %r79, 1;
BB0_3:
ld.global.u32 %rd38, [pixelID];
// inline asm
call (%rd36), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd38, %rd23, %rd12, %rd12);
// inline asm
ld.f32 %f59, [%rd36+12];
setp.leu.f32 %p7, %f59, 0f00000000;
@%p7 bra BB0_5;
// inline asm
call (%rd43), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd38, %rd23, %rd12, %rd12);
// inline asm
ld.v4.f32 {%f60, %f61, %f62, %f63}, [%rd43];
add.f32 %f116, %f116, %f60;
add.f32 %f117, %f117, %f61;
add.f32 %f118, %f118, %f62;
add.s32 %r79, %r79, 1;
BB0_5:
cvt.u64.u32 %rd52, %r1;
// inline asm
call (%rd50), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd52, %rd23, %rd12, %rd12);
// inline asm
ld.f32 %f67, [%rd50+12];
setp.leu.f32 %p8, %f67, 0f00000000;
@%p8 bra BB0_7;
// inline asm
call (%rd57), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd52, %rd23, %rd12, %rd12);
// inline asm
ld.v4.f32 {%f68, %f69, %f70, %f71}, [%rd57];
add.f32 %f116, %f116, %f68;
add.f32 %f117, %f117, %f69;
add.f32 %f118, %f118, %f70;
add.s32 %r79, %r79, 1;
BB0_7:
ld.global.u32 %rd67, [pixelID+4];
// inline asm
call (%rd64), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd22, %rd67, %rd12, %rd12);
// inline asm
ld.f32 %f75, [%rd64+12];
setp.leu.f32 %p9, %f75, 0f00000000;
@%p9 bra BB0_9;
// inline asm
call (%rd71), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd22, %rd67, %rd12, %rd12);
// inline asm
ld.v4.f32 {%f76, %f77, %f78, %f79}, [%rd71];
add.f32 %f116, %f116, %f76;
add.f32 %f117, %f117, %f77;
add.f32 %f118, %f118, %f78;
add.s32 %r79, %r79, 1;
BB0_9:
// inline asm
call (%rd78), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd52, %rd67, %rd12, %rd12);
// inline asm
ld.f32 %f83, [%rd78+12];
setp.leu.f32 %p10, %f83, 0f00000000;
@%p10 bra BB0_11;
// inline asm
call (%rd85), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd52, %rd67, %rd12, %rd12);
// inline asm
ld.v4.f32 {%f84, %f85, %f86, %f87}, [%rd85];
add.f32 %f116, %f116, %f84;
add.f32 %f117, %f117, %f85;
add.f32 %f118, %f118, %f86;
add.s32 %r79, %r79, 1;
BB0_11:
cvt.u64.u32 %rd95, %r2;
// inline asm
call (%rd92), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd22, %rd95, %rd12, %rd12);
// inline asm
ld.f32 %f91, [%rd92+12];
setp.leu.f32 %p11, %f91, 0f00000000;
@%p11 bra BB0_13;
// inline asm
call (%rd99), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd22, %rd95, %rd12, %rd12);
// inline asm
ld.v4.f32 {%f92, %f93, %f94, %f95}, [%rd99];
add.f32 %f116, %f116, %f92;
add.f32 %f117, %f117, %f93;
add.f32 %f118, %f118, %f94;
add.s32 %r79, %r79, 1;
BB0_13:
// inline asm
call (%rd106), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd52, %rd95, %rd12, %rd12);
// inline asm
ld.f32 %f99, [%rd106+12];
setp.leu.f32 %p12, %f99, 0f00000000;
@%p12 bra BB0_15;
// inline asm
call (%rd113), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd52, %rd95, %rd12, %rd12);
// inline asm
ld.v4.f32 {%f100, %f101, %f102, %f103}, [%rd113];
add.f32 %f116, %f116, %f100;
add.f32 %f117, %f117, %f101;
add.f32 %f118, %f118, %f102;
add.s32 %r79, %r79, 1;
BB0_15:
setp.eq.s32 %p13, %r79, 0;
@%p13 bra BB0_17;
cvt.rn.f32.u32 %f107, %r79;
rcp.rn.f32 %f108, %f107;
ld.global.v2.u32 {%r74, %r75}, [pixelID];
cvt.u64.u32 %rd122, %r74;
cvt.u64.u32 %rd123, %r75;
// inline asm
call (%rd120), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd122, %rd123, %rd12, %rd12);
// inline asm
mul.f32 %f109, %f118, %f108;
mul.f32 %f110, %f117, %f108;
mul.f32 %f111, %f116, %f108;
mov.f32 %f112, 0f3F800000;
st.v4.f32 [%rd120], {%f111, %f110, %f109, %f112};
BB0_17:
ret;
}