From 3746079b1a2629eaf89791419b57a66321244666 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Thu, 24 Jul 2025 00:50:35 +0200 Subject: [PATCH] Assorted instruction fixes (#423) This fixes transcendentals and some other buggy instructions exposed by `ptx_tests` (abs, neg). Add (slow - hardware limitation) tanh. Only two remaining incorrect instructions are div and sqrt with non-default rounding, but this commit is already bloated enough --- ptx/lib/zluda_ptx_impl.bc | Bin 9812 -> 11096 bytes ptx/lib/zluda_ptx_impl.cpp | 105 +++++- ptx/src/pass/insert_post_saturation.rs | 3 + .../instruction_mode_to_global_mode/mod.rs | 99 ++++-- ptx/src/pass/llvm/emit.rs | 333 +++++++++++++++--- ...eplace_instructions_with_function_calls.rs | 47 ++- ptx/src/test/ll/abs.ll | 34 ++ ptx/src/test/ll/shr.ll | 5 +- ptx/src/test/ll/shr_oob.ll | 31 ++ ptx/src/test/ll/tanh.ll | 31 ++ ptx/src/test/spirv_run/abs.ptx | 22 ++ ptx/src/test/spirv_run/mod.rs | 3 + ptx/src/test/spirv_run/shr_oob.ptx | 21 ++ ptx/src/test/spirv_run/tanh.ptx | 21 ++ ptx_parser/src/ast.rs | 57 +++ ptx_parser/src/lib.rs | 49 ++- 16 files changed, 753 insertions(+), 108 deletions(-) create mode 100644 ptx/src/test/ll/abs.ll create mode 100644 ptx/src/test/ll/shr_oob.ll create mode 100644 ptx/src/test/ll/tanh.ll create mode 100644 ptx/src/test/spirv_run/abs.ptx create mode 100644 ptx/src/test/spirv_run/shr_oob.ptx create mode 100644 ptx/src/test/spirv_run/tanh.ptx diff --git a/ptx/lib/zluda_ptx_impl.bc b/ptx/lib/zluda_ptx_impl.bc index 039e4d181b136715645d836b04f2e7b01edd6f47..64593d36235c6270f84516e76e3244732f2f24dc 100644 GIT binary patch delta 4553 zcmai0eQ*@z8Gqj0y}iBUF4jzV`>Abi1T#gUx z&CUFFe*1fV&+|U-yYKtHUHhk7;xoA2&g~VONO1f1Qu4<3=h_PWw2T`5vUu>-=U`>E ztT~`w0l0gxlIRYz=A&;XCMG7}32xjuR0`fYr(P<&2(9Y1WW39ht9*08+vCpY94OwU zontIn=orcc*O{)Nf^D8&MbNp~P|~Wuri0fY`#geQhSFB`=s43l^*{F~LJ(gn>!G6G z4~rkW1?r1IR>>d7J@b{8vrNMei1hjB3LaDkz>gGwS(d}uQX`FNp|dZ$q6-4hx#~n+|FG_Rt=r)dcWVVtw@qt_QSq?AdBoibdCzb+@rc~uv(Ej5 z{MTCJDlt#>k}OD+hiiO7&NJ@>LKGyxUzM?9dF~A{=mG$lkt9xP_QeMj6-3lfx3cQp zBkuswK~IM2ux=>6a_(ZML^*O9C;oQTKJP1?}pz%CGHtp!`#gzMfgr>K`mp zWXR2|=^w6RV8qg4RmO@hSM3zxru2a0q-NIh`(zbnpoqE^bJdrtl~&MhK#t}6e9i+r zrxTi-bjo`0=+@}42q!P_@;|;%1s_Pj3X|CQ9^L8h1g$>P>ed&sc`m6){+QLFo?!>% zJ8Z%PR@c7mHGD2qWML$mn>!YWg;485inaZ=T+d=i1d(DXPra|)c2UZs!{hFYWzk0U z61O9jXREdR>JeL2Gqr)-zW`?{lye-Qh@ZQ*B1MBJNPV~Gct9LekR^< zv24@UE{W&#Oxcug#W4lQ(4-qZB1T6ZbONkZ0rJ^crzNKP^J?RLu8Qkk6KcHHH|}bl z?&%wx=Q+od;Na{$nUEH1iFfJrwydYLMx#TOME5LZxR-noRh7Pw?@+A1Pmvp>Sg1YA z+U=cVQX<8o)3e{mTRbQn)|a0aN3<_=N%h9Gzsg$;?U?Qk<*cmu z0Wo9zY>X2_JBcfy+Bx| zw5He`o86FNt&+k@F&>g)q}jT;u#AjZ_ZD7b6Sr803d@8Op}Ni1v_cIC=(Nxuu$?l}J>qAg7yW6`Ie);%(#c+)Ew}844+ncg5-hRi6y3r3vaMZ3 z3$I$(uO$rQP#`#D>`(X~#dJyUK$(J39^q&JGznPHV`Yu}HB}bWMM=>*1z~ zk~^g%;BNL8lq;1%Dhs?P*P2(n)HLKSBc2#h6~ePQW5-Lr5SqzSH z>F>*-xIZ@i090BVDCS*s0@bf&X_P%mj`ys?s$%MmsT{)6u!?#`MSt^IsFUFl_7D?1b)U~Ah{fT?V!$&t&t9e;4`jqj)K zk*qWEXQ_NVE7U%G%rSbqR+?JjA*KcUi@3KehU!oPthx1@QiZr(z%c4oU8hTR{c6>#o(uO{@-$Xt$(IA)%9)w zivW1}NBUV2Y`MBbNM4s}*<}SFeY&ty=2`#2Eai86Q)7(A&L0nI6!itAtL`t z$_SD1@(ch6u@fUQo)ub50N4N#QD05$gQA+4_5&ukj@XG2F(6C;rm%&SLe$?w?2#d& zetRkbBI@@Ld)**Tn5hx4 zunQa_CjOiQV0U*!)O&e*%5605#B4Q|^8ghvi5PG_4^Rk@M#i&(8hN`QMAUcj0POmR zi2QyYU^+k=le{?t;H^{uA_jbw3P42OF96I0NF(yP08j>yM&ug>`|CnPo(cdayDzZ^ zhU^6!nI!;}uoF9l7%(8%Ek#8Bcq$$deLfWIsX;{kYr*c^*8-TyIYlFOqhN&~V!)LS zfKq@oqJCvckBIu)Q+hW6cM`y#v&5_oI_O>k@4dF;+%Uzuf z&GV{STkqLCcVqk9=15a(xV?QYQsK?(HZ>U(t6hh7Gk|$?w6Xwjmgnk|1J;z59ZQN AMgRZ+ delta 3264 zcmZWr4QvzF9sj+%^ZD#J$;GjgXPb~SBq5~0E@^?JV~x$%(lo6?_!#Xh(^An z-W)kYTq(4ej=@x6FI6JKoXEKZ6@~Fsp%nOU)^kf}e}jS!MW}5_Exb86kU)B|8i@wa zXXN)i{!b?+CgOMp=>=*Ra1@VIV7BFVb~Q_X*gbwIVn%6ihPEjejbyvLNe?%sTo(eK zrLnfPR}Ia*Ar|~=qDI-Qw`o@m?Ijlpw$ZDG1^`3D&eMea#p-rd#IN^8o#?HOjTT8o zCvSwKCKM*gjs~M;e({q@qyVUlA!)LtE`5e-kaH549@_El*gGH*M6#w4bPlKM<}Gq6 zrT`(rbb=lD(g4(G`1w&f} zb2j0si)Q%kTdRC3+>lI0N-U=?=V=(va1G1p!edW4B#2XUx}TYYfgl&LoGz(r$nZ}% z!}h5){_2hsHbZuBP44pX3~i_xHIki9&>!*`k|l4_1FWk&*Gc|MNu; zHEQrd0ZU9_>+3rh*adyXR97A63Pd5R$PVV}jXfC$N*y*42g|FjZ;4op8aKCOwb`;z zE=QY>Q*7IxDK1=ufm%+nkZ<3j&pr@xBs1=LP zifu;!rNRHecr#Bm7%d+kAbSWg^9AV~PH%Z6rHFziav3|zJ+hyI3{J|-F*z|ZE(1?% zKq-KR>B0cDJ}QieT^-AZ78czTNyU)vRSDT_MN1dYJnKBBuQ$3iMfOzEOfMNpXw;1Q zdXp>mQgxk@j1D=ioEf^_JlYsr}9+ECZmWAD8?H@&yMsHGo zPyG?0mmBD^bgKV(x%Cxw%+a}x?vKiQsZ*P-Bom_r*0a9)vTQC*wck`PXL<=SHLCq% znABTGeP0Qki=H8fX#gvJsT7r$?xj9MO&XE2)^vZFoSmMWoF18+9<;vhk2*`_up+Mx zS#SDJ$T2l(w4b%Q%AO)&>t|)#o%QS^4O*Wo-{WHkwN%Qa6NWNVmeizScDb#~<&{Ma zx?7{$*6Hu5YU~WLE|t&7QD5fNY+5h(MwKL4V;OV?IcBBk=WSxSHAZJhN22XLRvp#3 zNDDk5+8y|;Xgm;Bk423u3mQqyjRY5QkrWF1 zSaj7ZvU66@KU&)xRm_D}Q|PP0Dz|k!)JPhvTOohovibe$+MTqrDz++P@N5c{Rnz)fQwj)moGQ3S zeqn~s7c?f%o7I0R=t{n2&g%E{CVyql9`T$=UNIl~r|05tZ<_ON7V;aj(E~g|fVfxO zZ?Lw>*7$*h>{HXm9v>ioGuaJoU~R`?^B+n;3G?It*fS8wdDU`lh7G1?V+#P+6l5>4 zG6#48Kp_z4lk5lR0PFi@m_%-H06+-HBja6Fz*zv(F^Qc24FI+-PzEX*hEJHNoC$j*MrA?z{kmF^Llsal4kk7XbG7 zapZyT3IO}x!;uFl5$)P@S3iB3#clOMS^>l9DuytO~`VCum N%