ArabDesert/Assets/Editor/x64/Bakery/denoisePrepareSH.ptx

596 lines
18 KiB
Plaintext

//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-23083092
// Cuda compilation tools, release 9.1, V9.1.85
// Based on LLVM 3.4svn
//
.version 6.1
.target sm_30
.address_size 64
// .globl _Z6oxMainv
.global .align 8 .b8 pixelID[8];
.global .align 8 .b8 resolution[8];
.global .align 1 .b8 input_buffer[1];
.global .align 1 .b8 image[1];
.global .align 4 .u32 mode;
.global .align 4 .b8 _ZN21rti_internal_typeinfo7pixelIDE[8] = {82, 97, 121, 0, 8, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo10resolutionE[8] = {82, 97, 121, 0, 8, 0, 0, 0};
.global .align 4 .b8 _ZN21rti_internal_typeinfo4modeE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
.global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E;
.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E;
.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E;
.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE;
.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE;
.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE;
.global .align 8 .b8 _ZN21rti_internal_typename7pixelIDE[6] = {117, 105, 110, 116, 50, 0};
.global .align 8 .b8 _ZN21rti_internal_typename10resolutionE[6] = {117, 105, 110, 116, 50, 0};
.global .align 4 .b8 _ZN21rti_internal_typename4modeE[4] = {105, 110, 116, 0};
.global .align 4 .u32 _ZN21rti_internal_typeenum7pixelIDE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum10resolutionE = 4919;
.global .align 4 .u32 _ZN21rti_internal_typeenum4modeE = 4919;
.global .align 16 .b8 _ZN21rti_internal_semantic7pixelIDE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0};
.global .align 16 .b8 _ZN21rti_internal_semantic10resolutionE[12] = {114, 116, 76, 97, 117, 110, 99, 104, 68, 105, 109, 0};
.global .align 1 .b8 _ZN21rti_internal_semantic4modeE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation7pixelIDE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation10resolutionE[1];
.global .align 1 .b8 _ZN23rti_internal_annotation4modeE[1];
.visible .entry _Z6oxMainv(
)
{
.reg .pred %p<53>;
.reg .b16 %rs<4>;
.reg .f32 %f<327>;
.reg .b32 %r<67>;
.reg .b64 %rd<48>;
ld.global.u32 %r1, [mode];
setp.gt.s32 %p4, %r1, 0;
ld.global.v2.u32 {%r2, %r3}, [pixelID];
cvt.u64.u32 %rd1, %r2;
cvt.u64.u32 %rd2, %r3;
@%p4 bra BB0_2;
bra.uni BB0_1;
BB0_2:
mov.u64 %rd40, input_buffer;
cvta.global.u64 %rd23, %rd40;
mov.u32 %r24, 2;
mov.u32 %r25, 16;
mov.u64 %rd39, 0;
// inline asm
call (%rd22), _rt_buffer_get_64, (%rd23, %r24, %r25, %rd1, %rd2, %rd39, %rd39);
// inline asm
ld.f32 %f315, [%rd22];
ld.global.v2.u32 {%r26, %r27}, [pixelID];
cvt.u64.u32 %rd30, %r26;
cvt.u64.u32 %rd31, %r27;
// inline asm
call (%rd28), _rt_buffer_get_64, (%rd23, %r24, %r25, %rd30, %rd31, %rd39, %rd39);
// inline asm
ld.f32 %f316, [%rd28+4];
ld.global.v2.u32 {%r30, %r31}, [pixelID];
cvt.u64.u32 %rd36, %r30;
cvt.u64.u32 %rd37, %r31;
// inline asm
call (%rd34), _rt_buffer_get_64, (%rd23, %r24, %r25, %rd36, %rd37, %rd39, %rd39);
// inline asm
ld.f32 %f317, [%rd34+8];
bra.uni BB0_3;
BB0_1:
mov.u64 %rd21, image;
cvta.global.u64 %rd4, %rd21;
mov.u32 %r10, 2;
mov.u32 %r11, 8;
mov.u64 %rd20, 0;
// inline asm
call (%rd3), _rt_buffer_get_64, (%rd4, %r10, %r11, %rd1, %rd2, %rd20, %rd20);
// inline asm
ld.u16 %rs1, [%rd3];
// inline asm
{ cvt.f32.f16 %f49, %rs1;}
// inline asm
ld.global.v2.u32 {%r12, %r13}, [pixelID];
cvt.u64.u32 %rd11, %r12;
cvt.u64.u32 %rd12, %r13;
// inline asm
call (%rd9), _rt_buffer_get_64, (%rd4, %r10, %r11, %rd11, %rd12, %rd20, %rd20);
// inline asm
ld.u16 %rs2, [%rd9+2];
// inline asm
{ cvt.f32.f16 %f50, %rs2;}
// inline asm
ld.global.v2.u32 {%r16, %r17}, [pixelID];
cvt.u64.u32 %rd17, %r16;
cvt.u64.u32 %rd18, %r17;
// inline asm
call (%rd15), _rt_buffer_get_64, (%rd4, %r10, %r11, %rd17, %rd18, %rd20, %rd20);
// inline asm
ld.u16 %rs3, [%rd15+4];
// inline asm
{ cvt.f32.f16 %f51, %rs3;}
// inline asm
mul.f32 %f52, %f49, 0f40800000;
mul.f32 %f53, %f50, 0f40800000;
mul.f32 %f54, %f51, 0f40800000;
max.f32 %f55, %f52, %f53;
max.f32 %f56, %f55, %f54;
add.f32 %f57, %f56, 0f3F800000;
rcp.rn.f32 %f58, %f57;
mul.f32 %f315, %f52, %f58;
mul.f32 %f316, %f53, %f58;
mul.f32 %f317, %f54, %f58;
BB0_3:
mov.f32 %f61, 0f3E68BA2E;
cvt.rzi.f32.f32 %f62, %f61;
fma.rn.f32 %f63, %f62, 0fC0000000, 0f3EE8BA2E;
abs.f32 %f10, %f63;
abs.f32 %f11, %f315;
setp.lt.f32 %p5, %f11, 0f00800000;
mul.f32 %f64, %f11, 0f4B800000;
selp.f32 %f65, 0fC3170000, 0fC2FE0000, %p5;
selp.f32 %f66, %f64, %f11, %p5;
mov.b32 %r34, %f66;
and.b32 %r35, %r34, 8388607;
or.b32 %r36, %r35, 1065353216;
mov.b32 %f67, %r36;
shr.u32 %r37, %r34, 23;
cvt.rn.f32.u32 %f68, %r37;
add.f32 %f69, %f65, %f68;
setp.gt.f32 %p6, %f67, 0f3FB504F3;
mul.f32 %f70, %f67, 0f3F000000;
add.f32 %f71, %f69, 0f3F800000;
selp.f32 %f72, %f70, %f67, %p6;
selp.f32 %f73, %f71, %f69, %p6;
add.f32 %f74, %f72, 0fBF800000;
add.f32 %f60, %f72, 0f3F800000;
// inline asm
rcp.approx.ftz.f32 %f59,%f60;
// inline asm
add.f32 %f75, %f74, %f74;
mul.f32 %f76, %f59, %f75;
mul.f32 %f77, %f76, %f76;
mov.f32 %f78, 0f3C4CAF63;
mov.f32 %f79, 0f3B18F0FE;
fma.rn.f32 %f80, %f79, %f77, %f78;
mov.f32 %f81, 0f3DAAAABD;
fma.rn.f32 %f82, %f80, %f77, %f81;
mul.rn.f32 %f83, %f82, %f77;
mul.rn.f32 %f84, %f83, %f76;
sub.f32 %f85, %f74, %f76;
neg.f32 %f86, %f76;
add.f32 %f87, %f85, %f85;
fma.rn.f32 %f88, %f86, %f74, %f87;
mul.rn.f32 %f89, %f59, %f88;
add.f32 %f90, %f84, %f76;
sub.f32 %f91, %f76, %f90;
add.f32 %f92, %f84, %f91;
add.f32 %f93, %f89, %f92;
add.f32 %f94, %f90, %f93;
sub.f32 %f95, %f90, %f94;
add.f32 %f96, %f93, %f95;
mov.f32 %f97, 0f3F317200;
mul.rn.f32 %f98, %f73, %f97;
mov.f32 %f99, 0f35BFBE8E;
mul.rn.f32 %f100, %f73, %f99;
add.f32 %f101, %f98, %f94;
sub.f32 %f102, %f98, %f101;
add.f32 %f103, %f94, %f102;
add.f32 %f104, %f96, %f103;
add.f32 %f105, %f100, %f104;
add.f32 %f106, %f101, %f105;
sub.f32 %f107, %f101, %f106;
add.f32 %f108, %f105, %f107;
mov.f32 %f109, 0f3EE8BA2E;
mul.rn.f32 %f110, %f109, %f106;
neg.f32 %f111, %f110;
fma.rn.f32 %f112, %f109, %f106, %f111;
fma.rn.f32 %f113, %f109, %f108, %f112;
mov.f32 %f114, 0f00000000;
fma.rn.f32 %f115, %f114, %f106, %f113;
add.rn.f32 %f116, %f110, %f115;
neg.f32 %f117, %f116;
add.rn.f32 %f118, %f110, %f117;
add.rn.f32 %f119, %f118, %f115;
mov.b32 %r38, %f116;
setp.eq.s32 %p7, %r38, 1118925336;
add.s32 %r39, %r38, -1;
mov.b32 %f120, %r39;
add.f32 %f121, %f119, 0f37000000;
selp.f32 %f122, %f120, %f116, %p7;
selp.f32 %f12, %f121, %f119, %p7;
mul.f32 %f123, %f122, 0f3FB8AA3B;
cvt.rzi.f32.f32 %f124, %f123;
mov.f32 %f125, 0fBF317200;
fma.rn.f32 %f126, %f124, %f125, %f122;
mov.f32 %f127, 0fB5BFBE8E;
fma.rn.f32 %f128, %f124, %f127, %f126;
mul.f32 %f129, %f128, 0f3FB8AA3B;
ex2.approx.ftz.f32 %f130, %f129;
add.f32 %f131, %f124, 0f00000000;
ex2.approx.f32 %f132, %f131;
mul.f32 %f133, %f130, %f132;
setp.lt.f32 %p8, %f122, 0fC2D20000;
selp.f32 %f134, 0f00000000, %f133, %p8;
setp.gt.f32 %p9, %f122, 0f42D20000;
selp.f32 %f318, 0f7F800000, %f134, %p9;
setp.eq.f32 %p10, %f318, 0f7F800000;
@%p10 bra BB0_5;
fma.rn.f32 %f318, %f318, %f12, %f318;
BB0_5:
setp.lt.f32 %p11, %f315, 0f00000000;
setp.eq.f32 %p12, %f10, 0f3F800000;
and.pred %p1, %p11, %p12;
mov.b32 %r40, %f318;
xor.b32 %r41, %r40, -2147483648;
mov.b32 %f135, %r41;
selp.f32 %f320, %f135, %f318, %p1;
setp.eq.f32 %p13, %f315, 0f00000000;
@%p13 bra BB0_8;
bra.uni BB0_6;
BB0_8:
add.f32 %f138, %f315, %f315;
selp.f32 %f320, %f138, 0f00000000, %p12;
bra.uni BB0_9;
BB0_6:
setp.geu.f32 %p14, %f315, 0f00000000;
@%p14 bra BB0_9;
cvt.rzi.f32.f32 %f137, %f109;
setp.neu.f32 %p15, %f137, 0f3EE8BA2E;
selp.f32 %f320, 0f7FFFFFFF, %f320, %p15;
BB0_9:
add.f32 %f139, %f11, 0f3EE8BA2E;
mov.b32 %r42, %f139;
setp.lt.s32 %p17, %r42, 2139095040;
@%p17 bra BB0_14;
setp.gtu.f32 %p18, %f11, 0f7F800000;
@%p18 bra BB0_13;
bra.uni BB0_11;
BB0_13:
add.f32 %f320, %f315, 0f3EE8BA2E;
bra.uni BB0_14;
BB0_11:
setp.neu.f32 %p19, %f11, 0f7F800000;
@%p19 bra BB0_14;
selp.f32 %f320, 0fFF800000, 0f7F800000, %p1;
BB0_14:
mov.f32 %f307, 0fBF317200;
mov.f32 %f306, 0f00000000;
mov.f32 %f305, 0f35BFBE8E;
mov.f32 %f304, 0f3F317200;
mov.f32 %f303, 0f3DAAAABD;
mov.f32 %f302, 0f3C4CAF63;
mov.f32 %f301, 0f3B18F0FE;
setp.eq.f32 %p20, %f315, 0f3F800000;
selp.f32 %f142, 0f3F800000, %f320, %p20;
cvt.sat.f32.f32 %f23, %f142;
abs.f32 %f24, %f316;
setp.lt.f32 %p21, %f24, 0f00800000;
mul.f32 %f143, %f24, 0f4B800000;
selp.f32 %f144, 0fC3170000, 0fC2FE0000, %p21;
selp.f32 %f145, %f143, %f24, %p21;
mov.b32 %r43, %f145;
and.b32 %r44, %r43, 8388607;
or.b32 %r45, %r44, 1065353216;
mov.b32 %f146, %r45;
shr.u32 %r46, %r43, 23;
cvt.rn.f32.u32 %f147, %r46;
add.f32 %f148, %f144, %f147;
setp.gt.f32 %p22, %f146, 0f3FB504F3;
mul.f32 %f149, %f146, 0f3F000000;
add.f32 %f150, %f148, 0f3F800000;
selp.f32 %f151, %f149, %f146, %p22;
selp.f32 %f152, %f150, %f148, %p22;
add.f32 %f153, %f151, 0fBF800000;
add.f32 %f141, %f151, 0f3F800000;
// inline asm
rcp.approx.ftz.f32 %f140,%f141;
// inline asm
add.f32 %f154, %f153, %f153;
mul.f32 %f155, %f140, %f154;
mul.f32 %f156, %f155, %f155;
fma.rn.f32 %f159, %f301, %f156, %f302;
fma.rn.f32 %f161, %f159, %f156, %f303;
mul.rn.f32 %f162, %f161, %f156;
mul.rn.f32 %f163, %f162, %f155;
sub.f32 %f164, %f153, %f155;
neg.f32 %f165, %f155;
add.f32 %f166, %f164, %f164;
fma.rn.f32 %f167, %f165, %f153, %f166;
mul.rn.f32 %f168, %f140, %f167;
add.f32 %f169, %f163, %f155;
sub.f32 %f170, %f155, %f169;
add.f32 %f171, %f163, %f170;
add.f32 %f172, %f168, %f171;
add.f32 %f173, %f169, %f172;
sub.f32 %f174, %f169, %f173;
add.f32 %f175, %f172, %f174;
mul.rn.f32 %f177, %f152, %f304;
mul.rn.f32 %f179, %f152, %f305;
add.f32 %f180, %f177, %f173;
sub.f32 %f181, %f177, %f180;
add.f32 %f182, %f173, %f181;
add.f32 %f183, %f175, %f182;
add.f32 %f184, %f179, %f183;
add.f32 %f185, %f180, %f184;
sub.f32 %f186, %f180, %f185;
add.f32 %f187, %f184, %f186;
mul.rn.f32 %f189, %f109, %f185;
neg.f32 %f190, %f189;
fma.rn.f32 %f191, %f109, %f185, %f190;
fma.rn.f32 %f192, %f109, %f187, %f191;
fma.rn.f32 %f194, %f306, %f185, %f192;
add.rn.f32 %f195, %f189, %f194;
neg.f32 %f196, %f195;
add.rn.f32 %f197, %f189, %f196;
add.rn.f32 %f198, %f197, %f194;
mov.b32 %r47, %f195;
setp.eq.s32 %p23, %r47, 1118925336;
add.s32 %r48, %r47, -1;
mov.b32 %f199, %r48;
add.f32 %f200, %f198, 0f37000000;
selp.f32 %f201, %f199, %f195, %p23;
selp.f32 %f25, %f200, %f198, %p23;
mul.f32 %f202, %f201, 0f3FB8AA3B;
cvt.rzi.f32.f32 %f203, %f202;
fma.rn.f32 %f205, %f203, %f307, %f201;
fma.rn.f32 %f207, %f203, %f127, %f205;
mul.f32 %f208, %f207, 0f3FB8AA3B;
ex2.approx.ftz.f32 %f209, %f208;
add.f32 %f210, %f203, 0f00000000;
ex2.approx.f32 %f211, %f210;
mul.f32 %f212, %f209, %f211;
setp.lt.f32 %p24, %f201, 0fC2D20000;
selp.f32 %f213, 0f00000000, %f212, %p24;
setp.gt.f32 %p25, %f201, 0f42D20000;
selp.f32 %f321, 0f7F800000, %f213, %p25;
setp.eq.f32 %p26, %f321, 0f7F800000;
@%p26 bra BB0_16;
fma.rn.f32 %f321, %f321, %f25, %f321;
BB0_16:
setp.lt.f32 %p27, %f316, 0f00000000;
and.pred %p2, %p27, %p12;
mov.b32 %r49, %f321;
xor.b32 %r50, %r49, -2147483648;
mov.b32 %f214, %r50;
selp.f32 %f323, %f214, %f321, %p2;
setp.eq.f32 %p29, %f316, 0f00000000;
@%p29 bra BB0_19;
bra.uni BB0_17;
BB0_19:
add.f32 %f217, %f316, %f316;
selp.f32 %f323, %f217, 0f00000000, %p12;
bra.uni BB0_20;
BB0_17:
setp.geu.f32 %p30, %f316, 0f00000000;
@%p30 bra BB0_20;
cvt.rzi.f32.f32 %f216, %f109;
setp.neu.f32 %p31, %f216, 0f3EE8BA2E;
selp.f32 %f323, 0f7FFFFFFF, %f323, %p31;
BB0_20:
add.f32 %f218, %f24, 0f3EE8BA2E;
mov.b32 %r51, %f218;
setp.lt.s32 %p33, %r51, 2139095040;
@%p33 bra BB0_25;
setp.gtu.f32 %p34, %f24, 0f7F800000;
@%p34 bra BB0_24;
bra.uni BB0_22;
BB0_24:
add.f32 %f323, %f316, 0f3EE8BA2E;
bra.uni BB0_25;
BB0_22:
setp.neu.f32 %p35, %f24, 0f7F800000;
@%p35 bra BB0_25;
selp.f32 %f323, 0fFF800000, 0f7F800000, %p2;
BB0_25:
mov.f32 %f314, 0fBF317200;
mov.f32 %f313, 0f00000000;
mov.f32 %f312, 0f35BFBE8E;
mov.f32 %f311, 0f3F317200;
mov.f32 %f310, 0f3DAAAABD;
mov.f32 %f309, 0f3C4CAF63;
mov.f32 %f308, 0f3B18F0FE;
setp.eq.f32 %p36, %f316, 0f3F800000;
selp.f32 %f221, 0f3F800000, %f323, %p36;
cvt.sat.f32.f32 %f36, %f221;
abs.f32 %f37, %f317;
setp.lt.f32 %p37, %f37, 0f00800000;
mul.f32 %f222, %f37, 0f4B800000;
selp.f32 %f223, 0fC3170000, 0fC2FE0000, %p37;
selp.f32 %f224, %f222, %f37, %p37;
mov.b32 %r52, %f224;
and.b32 %r53, %r52, 8388607;
or.b32 %r54, %r53, 1065353216;
mov.b32 %f225, %r54;
shr.u32 %r55, %r52, 23;
cvt.rn.f32.u32 %f226, %r55;
add.f32 %f227, %f223, %f226;
setp.gt.f32 %p38, %f225, 0f3FB504F3;
mul.f32 %f228, %f225, 0f3F000000;
add.f32 %f229, %f227, 0f3F800000;
selp.f32 %f230, %f228, %f225, %p38;
selp.f32 %f231, %f229, %f227, %p38;
add.f32 %f232, %f230, 0fBF800000;
add.f32 %f220, %f230, 0f3F800000;
// inline asm
rcp.approx.ftz.f32 %f219,%f220;
// inline asm
add.f32 %f233, %f232, %f232;
mul.f32 %f234, %f219, %f233;
mul.f32 %f235, %f234, %f234;
fma.rn.f32 %f238, %f308, %f235, %f309;
fma.rn.f32 %f240, %f238, %f235, %f310;
mul.rn.f32 %f241, %f240, %f235;
mul.rn.f32 %f242, %f241, %f234;
sub.f32 %f243, %f232, %f234;
neg.f32 %f244, %f234;
add.f32 %f245, %f243, %f243;
fma.rn.f32 %f246, %f244, %f232, %f245;
mul.rn.f32 %f247, %f219, %f246;
add.f32 %f248, %f242, %f234;
sub.f32 %f249, %f234, %f248;
add.f32 %f250, %f242, %f249;
add.f32 %f251, %f247, %f250;
add.f32 %f252, %f248, %f251;
sub.f32 %f253, %f248, %f252;
add.f32 %f254, %f251, %f253;
mul.rn.f32 %f256, %f231, %f311;
mul.rn.f32 %f258, %f231, %f312;
add.f32 %f259, %f256, %f252;
sub.f32 %f260, %f256, %f259;
add.f32 %f261, %f252, %f260;
add.f32 %f262, %f254, %f261;
add.f32 %f263, %f258, %f262;
add.f32 %f264, %f259, %f263;
sub.f32 %f265, %f259, %f264;
add.f32 %f266, %f263, %f265;
mul.rn.f32 %f268, %f109, %f264;
neg.f32 %f269, %f268;
fma.rn.f32 %f270, %f109, %f264, %f269;
fma.rn.f32 %f271, %f109, %f266, %f270;
fma.rn.f32 %f273, %f313, %f264, %f271;
add.rn.f32 %f274, %f268, %f273;
neg.f32 %f275, %f274;
add.rn.f32 %f276, %f268, %f275;
add.rn.f32 %f277, %f276, %f273;
mov.b32 %r56, %f274;
setp.eq.s32 %p39, %r56, 1118925336;
add.s32 %r57, %r56, -1;
mov.b32 %f278, %r57;
add.f32 %f279, %f277, 0f37000000;
selp.f32 %f280, %f278, %f274, %p39;
selp.f32 %f38, %f279, %f277, %p39;
mul.f32 %f281, %f280, 0f3FB8AA3B;
cvt.rzi.f32.f32 %f282, %f281;
fma.rn.f32 %f284, %f282, %f314, %f280;
fma.rn.f32 %f286, %f282, %f127, %f284;
mul.f32 %f287, %f286, 0f3FB8AA3B;
ex2.approx.ftz.f32 %f288, %f287;
add.f32 %f289, %f282, 0f00000000;
ex2.approx.f32 %f290, %f289;
mul.f32 %f291, %f288, %f290;
setp.lt.f32 %p40, %f280, 0fC2D20000;
selp.f32 %f292, 0f00000000, %f291, %p40;
setp.gt.f32 %p41, %f280, 0f42D20000;
selp.f32 %f324, 0f7F800000, %f292, %p41;
setp.eq.f32 %p42, %f324, 0f7F800000;
@%p42 bra BB0_27;
fma.rn.f32 %f324, %f324, %f38, %f324;
BB0_27:
setp.lt.f32 %p43, %f317, 0f00000000;
and.pred %p3, %p43, %p12;
mov.b32 %r58, %f324;
xor.b32 %r59, %r58, -2147483648;
mov.b32 %f293, %r59;
selp.f32 %f326, %f293, %f324, %p3;
setp.eq.f32 %p45, %f317, 0f00000000;
@%p45 bra BB0_30;
bra.uni BB0_28;
BB0_30:
add.f32 %f296, %f317, %f317;
selp.f32 %f326, %f296, 0f00000000, %p12;
bra.uni BB0_31;
BB0_28:
setp.geu.f32 %p46, %f317, 0f00000000;
@%p46 bra BB0_31;
cvt.rzi.f32.f32 %f295, %f109;
setp.neu.f32 %p47, %f295, 0f3EE8BA2E;
selp.f32 %f326, 0f7FFFFFFF, %f326, %p47;
BB0_31:
add.f32 %f297, %f37, 0f3EE8BA2E;
mov.b32 %r60, %f297;
setp.lt.s32 %p49, %r60, 2139095040;
@%p49 bra BB0_36;
setp.gtu.f32 %p50, %f37, 0f7F800000;
@%p50 bra BB0_35;
bra.uni BB0_33;
BB0_35:
add.f32 %f326, %f317, 0f3EE8BA2E;
bra.uni BB0_36;
BB0_33:
setp.neu.f32 %p51, %f37, 0f7F800000;
@%p51 bra BB0_36;
selp.f32 %f326, 0fFF800000, 0f7F800000, %p3;
BB0_36:
setp.eq.f32 %p52, %f317, 0f3F800000;
selp.f32 %f298, 0f3F800000, %f326, %p52;
ld.global.v2.u32 {%r63, %r64}, [pixelID];
cvt.u64.u32 %rd43, %r63;
cvt.u64.u32 %rd44, %r64;
mov.u64 %rd47, input_buffer;
cvta.global.u64 %rd42, %rd47;
mov.u32 %r61, 2;
mov.u32 %r62, 16;
mov.u64 %rd46, 0;
// inline asm
call (%rd41), _rt_buffer_get_64, (%rd42, %r61, %r62, %rd43, %rd44, %rd46, %rd46);
// inline asm
cvt.sat.f32.f32 %f299, %f298;
mov.f32 %f300, 0f3F800000;
st.v4.f32 [%rd41], {%f23, %f36, %f299, %f300};
ret;
}