ArabDesert/Assets/Editor/x64/Bakery/addSH.ptx

667 lines
22 KiB
Plaintext

//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-23083092
// Cuda compilation tools, release 9.1, V9.1.85
// Based on LLVM 3.4svn
//
.version 6.1
.target sm_30
.address_size 64
// .globl _Z6oxMainv
.global .align 8 .b8 pixelID[8];
.global .align 8 .b8 resolution[8];
.global .align 4 .b8 normal[12];
.global .align 4 .b8 camPos[12];
.global .align 4 .b8 root[4];
.global .align 4 .u32 imageEnabled;
.global .texref lightmap;
.global .align 16 .b8 tileInfo[16];
.global .align 4 .u32 additive;
.global .align 1 .b8 inputImageL0[1];
.global .align 1 .b8 inputImageL1x[1];
.global .align 1 .b8 inputImageL1y[1];
.global .align 1 .b8 inputImageL1z[1];
.global .align 1 .b8 outputImageL0[1];
.global .align 1 .b8 outputImageL1x[1];
.global .align 1 .b8 outputImageL1y[1];
.global .align 1 .b8 outputImageL1z[1];
.global .align 1 .b8 packedImageL1x[1];
.global .align 1 .b8 packedImageL1y[1];
.global .align 1 .b8 packedImageL1z[1];
.global .align 4 .f32 DoPack;
.global .align 4 .b8 _ZN21rti_internal_typeinfo7pixelIDE[8] = {82, 97, 121, 0, 8, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo10resolutionE[8] = {82, 97, 121, 0, 8, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo6normalE[8] = {82, 97, 121, 0, 12, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo6camPosE[8] = {82, 97, 121, 0, 12, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo4rootE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo12imageEnabledE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo8tileInfoE[8] = {82, 97, 121, 0, 16, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo8additiveE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo6DoPackE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
.global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E;
.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE;
.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE;
.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE;
.global .align 8 .b8 _ZN21rti_internal_typename7pixelIDE[6] = {117, 105, 110, 116, 50, 0};
.global .align 8 .b8 _ZN21rti_internal_typename10resolutionE[6] = {117, 105, 110, 116, 50, 0};
.global .align 8 .b8 _ZN21rti_internal_typename6normalE[7] = {102, 108, 111, 97, 116, 51, 0};
.global .align 8 .b8 _ZN21rti_internal_typename6camPosE[7] = {102, 108, 111, 97, 116, 51, 0};
.global .align 16 .b8 _ZN21rti_internal_typename4rootE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0};
.global .align 4 .b8 _ZN21rti_internal_typename12imageEnabledE[4] = {105, 110, 116, 0};
.global .align 8 .b8 _ZN21rti_internal_typename8tileInfoE[6] = {117, 105, 110, 116, 52, 0};
.global .align 4 .b8 _ZN21rti_internal_typename8additiveE[4] = {105, 110, 116, 0};
.global .align 8 .b8 _ZN21rti_internal_typename6DoPackE[6] = {102, 108, 111, 97, 116, 0};
.global .align 4 .u32 _ZN21rti_internal_typeenum7pixelIDE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum10resolutionE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum6normalE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum6camPosE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum4rootE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum12imageEnabledE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum8tileInfoE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum8additiveE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum6DoPackE = 4919;
.global .align 16 .b8 _ZN21rti_internal_semantic7pixelIDE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0};
.global .align 16 .b8 _ZN21rti_internal_semantic10resolutionE[12] = {114, 116, 76, 97, 117, 110, 99, 104, 68, 105, 109, 0};
.global .align 16 .b8 _ZN21rti_internal_semantic6normalE[17] = {97, 116, 116, 114, 105, 98, 117, 116, 101, 32, 110, 111, 114, 109, 97, 108, 0};
.global .align 1 .b8 _ZN21rti_internal_semantic6camPosE[1];
.global .align 1 .b8 _ZN21rti_internal_semantic4rootE[1];
.global .align 1 .b8 _ZN21rti_internal_semantic12imageEnabledE[1];
.global .align 1 .b8 _ZN21rti_internal_semantic8tileInfoE[1];
.global .align 1 .b8 _ZN21rti_internal_semantic8additiveE[1];
.global .align 1 .b8 _ZN21rti_internal_semantic6DoPackE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation7pixelIDE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation10resolutionE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation6normalE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation6camPosE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation4rootE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation12imageEnabledE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation8tileInfoE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation8additiveE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation6DoPackE[1];
.visible .entry _Z6oxMainv(
)
{
.reg .pred %p<2>;
.reg .b16 %rs<51>;
.reg .f32 %f<132>;
.reg .b32 %r<209>;
.reg .b64 %rd<213>;
ld.global.v2.u32 {%r55, %r56}, [pixelID];
cvt.u64.u32 %rd3, %r55;
cvt.u64.u32 %rd4, %r56;
mov.u64 %rd163, inputImageL0;
cvta.global.u64 %rd2, %rd163;
mov.u32 %r53, 2;
mov.u32 %r54, 8;
mov.u64 %rd162, 0;
// inline asm
call (%rd1), _rt_buffer_get_64, (%rd2, %r53, %r54, %rd3, %rd4, %rd162, %rd162);
// inline asm
ld.u16 %rs2, [%rd1];
// inline asm
{ cvt.f32.f16 %f11, %rs2;}
// inline asm
ld.global.v2.u32 {%r59, %r60}, [pixelID];
cvt.u64.u32 %rd9, %r59;
cvt.u64.u32 %rd10, %r60;
// inline asm
call (%rd7), _rt_buffer_get_64, (%rd2, %r53, %r54, %rd9, %rd10, %rd162, %rd162);
// inline asm
ld.u16 %rs3, [%rd7+2];
// inline asm
{ cvt.f32.f16 %f12, %rs3;}
// inline asm
ld.global.v2.u32 {%r63, %r64}, [pixelID];
cvt.u64.u32 %rd15, %r63;
cvt.u64.u32 %rd16, %r64;
// inline asm
call (%rd13), _rt_buffer_get_64, (%rd2, %r53, %r54, %rd15, %rd16, %rd162, %rd162);
// inline asm
ld.u16 %rs4, [%rd13+4];
// inline asm
{ cvt.f32.f16 %f13, %rs4;}
// inline asm
ld.global.v2.u32 {%r67, %r68}, [pixelID];
cvt.u64.u32 %rd21, %r67;
cvt.u64.u32 %rd22, %r68;
mov.u64 %rd164, inputImageL1x;
cvta.global.u64 %rd20, %rd164;
// inline asm
call (%rd19), _rt_buffer_get_64, (%rd20, %r53, %r54, %rd21, %rd22, %rd162, %rd162);
// inline asm
ld.u16 %rs5, [%rd19];
// inline asm
{ cvt.f32.f16 %f14, %rs5;}
// inline asm
ld.global.v2.u32 {%r71, %r72}, [pixelID];
cvt.u64.u32 %rd27, %r71;
cvt.u64.u32 %rd28, %r72;
// inline asm
call (%rd25), _rt_buffer_get_64, (%rd20, %r53, %r54, %rd27, %rd28, %rd162, %rd162);
// inline asm
ld.u16 %rs6, [%rd25+2];
// inline asm
{ cvt.f32.f16 %f15, %rs6;}
// inline asm
ld.global.v2.u32 {%r75, %r76}, [pixelID];
cvt.u64.u32 %rd33, %r75;
cvt.u64.u32 %rd34, %r76;
// inline asm
call (%rd31), _rt_buffer_get_64, (%rd20, %r53, %r54, %rd33, %rd34, %rd162, %rd162);
// inline asm
ld.u16 %rs7, [%rd31+4];
// inline asm
{ cvt.f32.f16 %f16, %rs7;}
// inline asm
fma.rn.f32 %f41, %f14, 0f40000000, 0fBF800000;
fma.rn.f32 %f42, %f15, 0f40000000, 0fBF800000;
fma.rn.f32 %f43, %f16, 0f40000000, 0fBF800000;
ld.global.v2.u32 {%r79, %r80}, [pixelID];
cvt.u64.u32 %rd39, %r79;
cvt.u64.u32 %rd40, %r80;
mov.u64 %rd165, inputImageL1y;
cvta.global.u64 %rd38, %rd165;
// inline asm
call (%rd37), _rt_buffer_get_64, (%rd38, %r53, %r54, %rd39, %rd40, %rd162, %rd162);
// inline asm
ld.u16 %rs8, [%rd37];
// inline asm
{ cvt.f32.f16 %f17, %rs8;}
// inline asm
ld.global.v2.u32 {%r83, %r84}, [pixelID];
cvt.u64.u32 %rd45, %r83;
cvt.u64.u32 %rd46, %r84;
// inline asm
call (%rd43), _rt_buffer_get_64, (%rd38, %r53, %r54, %rd45, %rd46, %rd162, %rd162);
// inline asm
ld.u16 %rs9, [%rd43+2];
// inline asm
{ cvt.f32.f16 %f18, %rs9;}
// inline asm
ld.global.v2.u32 {%r87, %r88}, [pixelID];
cvt.u64.u32 %rd51, %r87;
cvt.u64.u32 %rd52, %r88;
// inline asm
call (%rd49), _rt_buffer_get_64, (%rd38, %r53, %r54, %rd51, %rd52, %rd162, %rd162);
// inline asm
ld.u16 %rs10, [%rd49+4];
// inline asm
{ cvt.f32.f16 %f19, %rs10;}
// inline asm
fma.rn.f32 %f44, %f17, 0f40000000, 0fBF800000;
fma.rn.f32 %f45, %f18, 0f40000000, 0fBF800000;
fma.rn.f32 %f46, %f19, 0f40000000, 0fBF800000;
ld.global.v2.u32 {%r91, %r92}, [pixelID];
cvt.u64.u32 %rd57, %r91;
cvt.u64.u32 %rd58, %r92;
mov.u64 %rd166, inputImageL1z;
cvta.global.u64 %rd56, %rd166;
// inline asm
call (%rd55), _rt_buffer_get_64, (%rd56, %r53, %r54, %rd57, %rd58, %rd162, %rd162);
// inline asm
ld.u16 %rs11, [%rd55];
// inline asm
{ cvt.f32.f16 %f20, %rs11;}
// inline asm
ld.global.v2.u32 {%r95, %r96}, [pixelID];
cvt.u64.u32 %rd63, %r95;
cvt.u64.u32 %rd64, %r96;
// inline asm
call (%rd61), _rt_buffer_get_64, (%rd56, %r53, %r54, %rd63, %rd64, %rd162, %rd162);
// inline asm
ld.u16 %rs12, [%rd61+2];
// inline asm
{ cvt.f32.f16 %f21, %rs12;}
// inline asm
ld.global.v2.u32 {%r99, %r100}, [pixelID];
cvt.u64.u32 %rd69, %r99;
cvt.u64.u32 %rd70, %r100;
// inline asm
call (%rd67), _rt_buffer_get_64, (%rd56, %r53, %r54, %rd69, %rd70, %rd162, %rd162);
// inline asm
ld.u16 %rs13, [%rd67+4];
// inline asm
{ cvt.f32.f16 %f22, %rs13;}
// inline asm
fma.rn.f32 %f47, %f20, 0f40000000, 0fBF800000;
fma.rn.f32 %f48, %f21, 0f40000000, 0fBF800000;
fma.rn.f32 %f49, %f22, 0f40000000, 0fBF800000;
ld.global.v2.u32 {%r103, %r104}, [pixelID];
cvt.u64.u32 %rd75, %r103;
cvt.u64.u32 %rd76, %r104;
mov.u64 %rd167, outputImageL0;
cvta.global.u64 %rd74, %rd167;
// inline asm
call (%rd73), _rt_buffer_get_64, (%rd74, %r53, %r54, %rd75, %rd76, %rd162, %rd162);
// inline asm
ld.u16 %rs14, [%rd73];
// inline asm
{ cvt.f32.f16 %f23, %rs14;}
// inline asm
ld.global.v2.u32 {%r107, %r108}, [pixelID];
cvt.u64.u32 %rd81, %r107;
cvt.u64.u32 %rd82, %r108;
// inline asm
call (%rd79), _rt_buffer_get_64, (%rd74, %r53, %r54, %rd81, %rd82, %rd162, %rd162);
// inline asm
ld.u16 %rs15, [%rd79+2];
// inline asm
{ cvt.f32.f16 %f24, %rs15;}
// inline asm
ld.global.v2.u32 {%r111, %r112}, [pixelID];
cvt.u64.u32 %rd87, %r111;
cvt.u64.u32 %rd88, %r112;
// inline asm
call (%rd85), _rt_buffer_get_64, (%rd74, %r53, %r54, %rd87, %rd88, %rd162, %rd162);
// inline asm
ld.u16 %rs16, [%rd85+4];
// inline asm
{ cvt.f32.f16 %f25, %rs16;}
// inline asm
ld.global.v2.u32 {%r115, %r116}, [pixelID];
cvt.u64.u32 %rd93, %r115;
cvt.u64.u32 %rd94, %r116;
mov.u64 %rd168, outputImageL1x;
cvta.global.u64 %rd92, %rd168;
// inline asm
call (%rd91), _rt_buffer_get_64, (%rd92, %r53, %r54, %rd93, %rd94, %rd162, %rd162);
// inline asm
ld.u16 %rs17, [%rd91];
// inline asm
{ cvt.f32.f16 %f26, %rs17;}
// inline asm
ld.global.v2.u32 {%r119, %r120}, [pixelID];
cvt.u64.u32 %rd99, %r119;
cvt.u64.u32 %rd100, %r120;
// inline asm
call (%rd97), _rt_buffer_get_64, (%rd92, %r53, %r54, %rd99, %rd100, %rd162, %rd162);
// inline asm
ld.u16 %rs18, [%rd97+2];
// inline asm
{ cvt.f32.f16 %f27, %rs18;}
// inline asm
ld.global.v2.u32 {%r123, %r124}, [pixelID];
cvt.u64.u32 %rd105, %r123;
cvt.u64.u32 %rd106, %r124;
// inline asm
call (%rd103), _rt_buffer_get_64, (%rd92, %r53, %r54, %rd105, %rd106, %rd162, %rd162);
// inline asm
ld.u16 %rs19, [%rd103+4];
// inline asm
{ cvt.f32.f16 %f28, %rs19;}
// inline asm
fma.rn.f32 %f50, %f26, 0f40000000, 0fBF800000;
fma.rn.f32 %f51, %f27, 0f40000000, 0fBF800000;
fma.rn.f32 %f52, %f28, 0f40000000, 0fBF800000;
mul.f32 %f53, %f23, %f50;
mul.f32 %f54, %f24, %f51;
mul.f32 %f55, %f25, %f52;
ld.global.v2.u32 {%r127, %r128}, [pixelID];
cvt.u64.u32 %rd111, %r127;
cvt.u64.u32 %rd112, %r128;
mov.u64 %rd169, outputImageL1y;
cvta.global.u64 %rd110, %rd169;
// inline asm
call (%rd109), _rt_buffer_get_64, (%rd110, %r53, %r54, %rd111, %rd112, %rd162, %rd162);
// inline asm
ld.u16 %rs20, [%rd109];
// inline asm
{ cvt.f32.f16 %f29, %rs20;}
// inline asm
ld.global.v2.u32 {%r131, %r132}, [pixelID];
cvt.u64.u32 %rd117, %r131;
cvt.u64.u32 %rd118, %r132;
// inline asm
call (%rd115), _rt_buffer_get_64, (%rd110, %r53, %r54, %rd117, %rd118, %rd162, %rd162);
// inline asm
ld.u16 %rs21, [%rd115+2];
// inline asm
{ cvt.f32.f16 %f30, %rs21;}
// inline asm
ld.global.v2.u32 {%r135, %r136}, [pixelID];
cvt.u64.u32 %rd123, %r135;
cvt.u64.u32 %rd124, %r136;
// inline asm
call (%rd121), _rt_buffer_get_64, (%rd110, %r53, %r54, %rd123, %rd124, %rd162, %rd162);
// inline asm
ld.u16 %rs22, [%rd121+4];
// inline asm
{ cvt.f32.f16 %f31, %rs22;}
// inline asm
fma.rn.f32 %f56, %f29, 0f40000000, 0fBF800000;
fma.rn.f32 %f57, %f30, 0f40000000, 0fBF800000;
fma.rn.f32 %f58, %f31, 0f40000000, 0fBF800000;
mul.f32 %f59, %f23, %f56;
mul.f32 %f60, %f24, %f57;
mul.f32 %f61, %f25, %f58;
ld.global.v2.u32 {%r139, %r140}, [pixelID];
cvt.u64.u32 %rd129, %r139;
cvt.u64.u32 %rd130, %r140;
mov.u64 %rd170, outputImageL1z;
cvta.global.u64 %rd128, %rd170;
// inline asm
call (%rd127), _rt_buffer_get_64, (%rd128, %r53, %r54, %rd129, %rd130, %rd162, %rd162);
// inline asm
ld.u16 %rs23, [%rd127];
// inline asm
{ cvt.f32.f16 %f32, %rs23;}
// inline asm
ld.global.v2.u32 {%r143, %r144}, [pixelID];
cvt.u64.u32 %rd135, %r143;
cvt.u64.u32 %rd136, %r144;
// inline asm
call (%rd133), _rt_buffer_get_64, (%rd128, %r53, %r54, %rd135, %rd136, %rd162, %rd162);
// inline asm
ld.u16 %rs24, [%rd133+2];
// inline asm
{ cvt.f32.f16 %f33, %rs24;}
// inline asm
ld.global.v2.u32 {%r147, %r148}, [pixelID];
cvt.u64.u32 %rd141, %r147;
cvt.u64.u32 %rd142, %r148;
// inline asm
call (%rd139), _rt_buffer_get_64, (%rd128, %r53, %r54, %rd141, %rd142, %rd162, %rd162);
// inline asm
ld.u16 %rs25, [%rd139+4];
// inline asm
{ cvt.f32.f16 %f34, %rs25;}
// inline asm
fma.rn.f32 %f62, %f32, 0f40000000, 0fBF800000;
fma.rn.f32 %f63, %f33, 0f40000000, 0fBF800000;
fma.rn.f32 %f64, %f34, 0f40000000, 0fBF800000;
mul.f32 %f65, %f23, %f62;
mul.f32 %f66, %f24, %f63;
mul.f32 %f67, %f25, %f64;
ld.global.v2.u32 {%r151, %r152}, [pixelID];
cvt.u64.u32 %rd147, %r151;
cvt.u64.u32 %rd148, %r152;
// inline asm
call (%rd145), _rt_buffer_get_64, (%rd2, %r53, %r54, %rd147, %rd148, %rd162, %rd162);
// inline asm
ld.u16 %rs26, [%rd145+6];
// inline asm
{ cvt.f32.f16 %f35, %rs26;}
// inline asm
ld.global.v2.u32 {%r155, %r156}, [pixelID];
cvt.u64.u32 %rd153, %r155;
cvt.u64.u32 %rd154, %r156;
// inline asm
call (%rd151), _rt_buffer_get_64, (%rd74, %r53, %r54, %rd153, %rd154, %rd162, %rd162);
// inline asm
ld.u16 %rs27, [%rd151+6];
// inline asm
{ cvt.f32.f16 %f36, %rs27;}
// inline asm
min.f32 %f40, %f35, %f36;
add.f32 %f68, %f11, %f23;
add.f32 %f69, %f12, %f24;
add.f32 %f70, %f13, %f25;
fma.rn.f32 %f71, %f11, %f41, %f53;
fma.rn.f32 %f72, %f12, %f42, %f54;
fma.rn.f32 %f73, %f13, %f43, %f55;
fma.rn.f32 %f74, %f11, %f44, %f59;
fma.rn.f32 %f75, %f12, %f45, %f60;
fma.rn.f32 %f76, %f13, %f46, %f61;
fma.rn.f32 %f77, %f11, %f47, %f65;
fma.rn.f32 %f78, %f12, %f48, %f66;
fma.rn.f32 %f79, %f13, %f49, %f67;
mov.f32 %f80, 0f34000000;
max.f32 %f81, %f68, %f80;
max.f32 %f82, %f69, %f80;
max.f32 %f83, %f70, %f80;
div.rn.f32 %f84, %f71, %f81;
div.rn.f32 %f85, %f72, %f82;
div.rn.f32 %f86, %f73, %f83;
fma.rn.f32 %f87, %f84, 0f3F000000, 0f3F000000;
fma.rn.f32 %f88, %f85, 0f3F000000, 0f3F000000;
fma.rn.f32 %f89, %f86, 0f3F000000, 0f3F000000;
div.rn.f32 %f90, %f74, %f81;
div.rn.f32 %f91, %f75, %f82;
div.rn.f32 %f92, %f76, %f83;
fma.rn.f32 %f93, %f90, 0f3F000000, 0f3F000000;
fma.rn.f32 %f94, %f91, 0f3F000000, 0f3F000000;
fma.rn.f32 %f95, %f92, 0f3F000000, 0f3F000000;
div.rn.f32 %f96, %f77, %f81;
div.rn.f32 %f97, %f78, %f82;
div.rn.f32 %f98, %f79, %f83;
fma.rn.f32 %f99, %f96, 0f3F000000, 0f3F000000;
fma.rn.f32 %f100, %f97, 0f3F000000, 0f3F000000;
fma.rn.f32 %f101, %f98, 0f3F000000, 0f3F000000;
mul.f32 %f37, %f68, %f40;
mul.f32 %f38, %f69, %f40;
mul.f32 %f39, %f70, %f40;
mul.f32 %f2, %f40, %f87;
mul.f32 %f3, %f40, %f88;
mul.f32 %f4, %f40, %f89;
mul.f32 %f5, %f40, %f93;
mul.f32 %f6, %f40, %f94;
mul.f32 %f7, %f40, %f95;
mul.f32 %f8, %f40, %f99;
mul.f32 %f9, %f40, %f100;
mul.f32 %f10, %f40, %f101;
ld.global.v2.u32 {%r159, %r160}, [pixelID];
cvt.u64.u32 %rd159, %r159;
cvt.u64.u32 %rd160, %r160;
// inline asm
call (%rd157), _rt_buffer_get_64, (%rd74, %r53, %r54, %rd159, %rd160, %rd162, %rd162);
// inline asm
// inline asm
{ cvt.rn.f16.f32 %rs31, %f40;}
// inline asm
// inline asm
{ cvt.rn.f16.f32 %rs30, %f39;}
// inline asm
// inline asm
{ cvt.rn.f16.f32 %rs29, %f38;}
// inline asm
// inline asm
{ cvt.rn.f16.f32 %rs28, %f37;}
// inline asm
st.v4.u16 [%rd157], {%rs28, %rs29, %rs30, %rs31};
ld.global.f32 %f102, [DoPack];
setp.gt.f32 %p1, %f102, 0f3F000000;
@%p1 bra BB0_2;
bra.uni BB0_1;
BB0_2:
mul.f32 %f112, %f2, 0f437F0000;
mov.f32 %f113, 0f437F0000;
min.f32 %f114, %f112, %f113;
mul.f32 %f115, %f3, 0f437F0000;
min.f32 %f116, %f115, %f113;
mul.f32 %f117, %f4, 0f437F0000;
min.f32 %f118, %f117, %f113;
mul.f32 %f119, %f5, 0f437F0000;
min.f32 %f120, %f119, %f113;
mul.f32 %f121, %f6, 0f437F0000;
min.f32 %f122, %f121, %f113;
mul.f32 %f123, %f7, 0f437F0000;
min.f32 %f124, %f123, %f113;
mul.f32 %f125, %f8, 0f437F0000;
min.f32 %f126, %f125, %f113;
mul.f32 %f127, %f9, 0f437F0000;
min.f32 %f128, %f127, %f113;
mul.f32 %f129, %f10, 0f437F0000;
min.f32 %f130, %f129, %f113;
ld.global.v2.u32 {%r187, %r188}, [pixelID];
cvt.u64.u32 %rd194, %r187;
cvt.u64.u32 %rd195, %r188;
mov.u64 %rd210, packedImageL1x;
cvta.global.u64 %rd193, %rd210;
mov.u32 %r186, 4;
// inline asm
call (%rd192), _rt_buffer_get_64, (%rd193, %r53, %r186, %rd194, %rd195, %rd162, %rd162);
// inline asm
cvt.rzi.u32.f32 %r191, %f114;
cvt.rzi.u32.f32 %r192, %f116;
cvt.rzi.u32.f32 %r193, %f118;
mul.f32 %f131, %f40, 0f437F0000;
cvt.rzi.u32.f32 %r194, %f131;
cvt.u16.u32 %rs41, %r193;
cvt.u16.u32 %rs42, %r192;
cvt.u16.u32 %rs43, %r191;
cvt.u16.u32 %rs44, %r194;
st.v4.u8 [%rd192], {%rs43, %rs42, %rs41, %rs44};
ld.global.v2.u32 {%r195, %r196}, [pixelID];
cvt.u64.u32 %rd200, %r195;
cvt.u64.u32 %rd201, %r196;
mov.u64 %rd211, packedImageL1y;
cvta.global.u64 %rd199, %rd211;
// inline asm
call (%rd198), _rt_buffer_get_64, (%rd199, %r53, %r186, %rd200, %rd201, %rd162, %rd162);
// inline asm
cvt.rzi.u32.f32 %r199, %f120;
cvt.rzi.u32.f32 %r200, %f122;
cvt.rzi.u32.f32 %r201, %f124;
cvt.u16.u32 %rs45, %r201;
cvt.u16.u32 %rs46, %r200;
cvt.u16.u32 %rs47, %r199;
st.v4.u8 [%rd198], {%rs47, %rs46, %rs45, %rs44};
ld.global.v2.u32 {%r202, %r203}, [pixelID];
cvt.u64.u32 %rd206, %r202;
cvt.u64.u32 %rd207, %r203;
mov.u64 %rd212, packedImageL1z;
cvta.global.u64 %rd205, %rd212;
// inline asm
call (%rd204), _rt_buffer_get_64, (%rd205, %r53, %r186, %rd206, %rd207, %rd162, %rd162);
// inline asm
cvt.rzi.u32.f32 %r206, %f126;
cvt.rzi.u32.f32 %r207, %f128;
cvt.rzi.u32.f32 %r208, %f130;
cvt.u16.u32 %rs48, %r208;
cvt.u16.u32 %rs49, %r207;
cvt.u16.u32 %rs50, %r206;
st.v4.u8 [%rd204], {%rs50, %rs49, %rs48, %rs44};
bra.uni BB0_3;
BB0_1:
ld.global.v2.u32 {%r169, %r170}, [pixelID];
cvt.u64.u32 %rd173, %r169;
cvt.u64.u32 %rd174, %r170;
// inline asm
call (%rd171), _rt_buffer_get_64, (%rd92, %r53, %r54, %rd173, %rd174, %rd162, %rd162);
// inline asm
cvt.sat.f32.f32 %f103, %f2;
cvt.sat.f32.f32 %f104, %f3;
cvt.sat.f32.f32 %f105, %f4;
// inline asm
{ cvt.rn.f16.f32 %rs34, %f105;}
// inline asm
// inline asm
{ cvt.rn.f16.f32 %rs33, %f104;}
// inline asm
// inline asm
{ cvt.rn.f16.f32 %rs32, %f103;}
// inline asm
st.v4.u16 [%rd171], {%rs32, %rs33, %rs34, %rs31};
ld.global.v2.u32 {%r173, %r174}, [pixelID];
cvt.u64.u32 %rd179, %r173;
cvt.u64.u32 %rd180, %r174;
// inline asm
call (%rd177), _rt_buffer_get_64, (%rd110, %r53, %r54, %rd179, %rd180, %rd162, %rd162);
// inline asm
cvt.sat.f32.f32 %f106, %f5;
cvt.sat.f32.f32 %f107, %f6;
cvt.sat.f32.f32 %f108, %f7;
// inline asm
{ cvt.rn.f16.f32 %rs37, %f108;}
// inline asm
// inline asm
{ cvt.rn.f16.f32 %rs36, %f107;}
// inline asm
// inline asm
{ cvt.rn.f16.f32 %rs35, %f106;}
// inline asm
st.v4.u16 [%rd177], {%rs35, %rs36, %rs37, %rs31};
ld.global.v2.u32 {%r177, %r178}, [pixelID];
cvt.u64.u32 %rd185, %r177;
cvt.u64.u32 %rd186, %r178;
// inline asm
call (%rd183), _rt_buffer_get_64, (%rd128, %r53, %r54, %rd185, %rd186, %rd162, %rd162);
// inline asm
cvt.sat.f32.f32 %f109, %f8;
cvt.sat.f32.f32 %f110, %f9;
cvt.sat.f32.f32 %f111, %f10;
// inline asm
{ cvt.rn.f16.f32 %rs40, %f111;}
// inline asm
// inline asm
{ cvt.rn.f16.f32 %rs39, %f110;}
// inline asm
// inline asm
{ cvt.rn.f16.f32 %rs38, %f109;}
// inline asm
st.v4.u16 [%rd183], {%rs38, %rs39, %rs40, %rs31};
BB0_3:
ret;
}