From ac6265f257654180f6661c406a025313190448c4 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Fri, 6 Nov 2020 00:56:45 +0100 Subject: [PATCH] Implement instructions bfe, rem, xor --- ptx/lib/notcuda_ptx_impl.cl | 22 ++- ptx/lib/notcuda_ptx_impl.spv | Bin 48348 -> 49396 bytes ptx/src/ast.rs | 19 +-- ptx/src/ptx.lalrpop | 63 +++++--- ptx/src/test/spirv_run/bfe.ptx | 23 +++ ptx/src/test/spirv_run/bfe.spvtxt | 70 +++++++++ ptx/src/test/spirv_run/mod.rs | 14 ++ ptx/src/test/spirv_run/rem.ptx | 23 +++ ptx/src/test/spirv_run/rem.spvtxt | 55 +++++++ ptx/src/test/spirv_run/xor.ptx | 23 +++ ptx/src/test/spirv_run/xor.spvtxt | 55 +++++++ ptx/src/translate.rs | 249 +++++++++++++++++++++++++++++- 12 files changed, 576 insertions(+), 40 deletions(-) create mode 100644 ptx/src/test/spirv_run/bfe.ptx create mode 100644 ptx/src/test/spirv_run/bfe.spvtxt create mode 100644 ptx/src/test/spirv_run/rem.ptx create mode 100644 ptx/src/test/spirv_run/rem.spvtxt create mode 100644 ptx/src/test/spirv_run/xor.ptx create mode 100644 ptx/src/test/spirv_run/xor.spvtxt diff --git a/ptx/lib/notcuda_ptx_impl.cl b/ptx/lib/notcuda_ptx_impl.cl index a0d487b..4249f2b 100644 --- a/ptx/lib/notcuda_ptx_impl.cl +++ b/ptx/lib/notcuda_ptx_impl.cl @@ -1,5 +1,5 @@ // Every time this file changes it must te rebuilt: -// ocloc -file notcuda_ptx_impl.cl -64 -options "-cl-std=CL2.0" -out_dir . -device kbl -output_no_suffix -spv_only +// ocloc -file notcuda_ptx_impl.cl -64 -options "-cl-std=CL2.0 -Dcl_intel_bit_instructions" -out_dir . -device kbl -output_no_suffix -spv_only // Additionally you should strip names: // spirv-opt --strip-debug notcuda_ptx_impl.spv -o notcuda_ptx_impl.spv @@ -119,3 +119,23 @@ atomic_dec(atom_relaxed_sys_shared_dec, memory_order_relaxed, memory_order_relax atomic_dec(atom_acquire_sys_shared_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __local); atomic_dec(atom_release_sys_shared_dec, memory_order_release, memory_order_acquire, memory_scope_device, __local); atomic_dec(atom_acq_rel_sys_shared_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local); + +uint FUNC(bfe_u32)(uint base, uint pos, uint len) +{ + return intel_ubfe(base, pos, len); +} + +ulong FUNC(bfe_u64)(ulong base, uint pos, uint len) +{ + return intel_ubfe(base, pos, len); +} + +int FUNC(bfe_s32)(int base, uint pos, uint len) +{ + return intel_sbfe(base, pos, len); +} + +long FUNC(bfe_s64)(long base, uint pos, uint len) +{ + return intel_sbfe(base, pos, len); +} \ No newline at end of file diff --git a/ptx/lib/notcuda_ptx_impl.spv b/ptx/lib/notcuda_ptx_impl.spv index 36f37bbc53f9e204d5376588900fed05e6608728..1ef470fcf0077084439be6d03e5ec33f1ad15805 100644 GIT binary patch literal 49396 zcmb821(c=5(T3|f?(XjH?(XieEU>`hu(-RsySux)ySqz5h>$=Ml0Xvp&r|chY}1+i zJK>x?)m2^fR(E|5yLa#G+-WtZ?L{*&#A1D$_SU-tiepwEB1-S+D5{(l~5|DwL^fAm1_Pklf4zjvU|zo;+!Ups*N4;egc z$baMi;Q`#gt}pwqHh}y8T;I?Aj~u}L>-w_)sRQ6Y>kEF%|Nl=O0RMG;*?+!)&cEQd z?7!YX=U>#9{U;mf{R@7}{xb~p{zZM+|IvXy|AODL{~reW{EPas{|O_S|KP26-)ra| z|CRqMM>PL_>dXFfj_CeneLwu~FrxeSQ(yKUHlpY6`v1cJo+Enx{nVHJCmiVf3x3Pz zzsNx6U(}cVe;R22g5R?LXal`}QD64IVxZ5z;J57m`aqw5QD63dY5@1=HPCZ;L*?;YU&cEQdeEz!(bpAzs z*?-}I-oN0t?7!4N?_boH{XZJ$^Dp=<`*#O=|Dt|)|FPro5uLw${lDFiU54y2bnBlF zkbl1#J0|kqi~9LH&o5p>^LGeq_z=>#y!}7Cj<$veA&uV+^c=~8?oyp&o`Iet=h^=} zjvnYaN{#9qKMdfx41ZnM|2ftfz&ZXyqdLd?1DMCpukZfLIVKy??@@aU`A?1N96Jp3 z9OX8WDRoB2I2ZT-%6vyb9a-Huqi9kFpcVy||@Y3+!c z+7ZvSBYxeEXmlbb??f!piP*3cv1cdZv`)m$oro7Y5npv8I^BqAx)DouBR1ayXCoriyk6Y=3(B+-1CI=KHPWt--+RB#Ij-5aN1WKq z(MrAfBVvK*WQ_N8`H#CTsP~h3KiX^FllGc(E)0*{ww>AS>%sZtj8{D>A`!|{8#oJx5yu_SdU-S(wEm^{KvE4oIHz(i~LC<;vdnB zNh4xWJsKhgK~g0dEk@1mDym@^{( ze?gyobjjuaL1u*APn^%CK8x%BySXDW_GOHmTD+srT=@Tu{|EVnK6y(-D|5-YUU%l5 zdVKb;Qfz^UjKeQ;z-N9_>pWQBd%^17es&8f-e|OP%`^Hjnwf95nLn>T_^;$-k%E&& z)pN0e6Mp|LIayA>r^5;Bl9S~lQqRvCPN*mD7_(kneXzdU3et6+Iax6x_c$B#!CxXG zmXro-H2FP~=SSR{D^;52WaWt5I>_>U3w`q7gnDw!$r|$6Hz#XGWZazKGrwhY9<1-ZR&{T4vbN%I!rGjWUvk3VA8Jn4 zEjU?EJ=ZTd;m`Pzlg;(#KAf;FIoTp2_57^ignHtRG3yP~2kW~HlCJa2$(DRAFe1+e zC-@si#75F!jpoMkiCc4UrD;yKipV{4;##aZ;VWAFe~&)-=+b8s^}(8xZPdq{42j5? zIoVcoff4@9#-dLioKR1WIoV!5`{rb5M8?etKJ(jD=fV2ke65tZdzR*8N5$cUwK*Zb zDFH77e4oa~~WyB3`AIM$!Yrg1DvofIXOKd_57^ignHtRG3(>i2kX0?AzkO0 zlQSc7kF&uE{s|FrqBK~ed6InM);z1yG$&_AiB@lZzFH6V~R0 z{E`#i52!i0wBY12^}M{`guhRgoLsNJW5NmRl9L-EQqRvCPN*mD7_+`YeXzdUjnZ|V zIk_n!_c$Az;9r^Z>w7)I8qKTaUsY({Txpt2rSIoBkI0xgxkGcIAHSFI3w`q7gnDw!$({1qHz#*RWZazKGrwzf9;|11cXe-ba*yJ0 z!rGjWUvk3xc{L~Z6`b6!o(~k9@Sa1-$rE}X0!~<$oIDwkdVbb$LOpTEnDv9|gZ15> zlCJa2$iJp23H8JsW7cn}57u}4 zSh~(LC!a**9%q9S{I?_G9ci#e^IiGGt@&xCX-+az?{xeYdfs>pXKZHt(JDK5L_o6a0}H5hF{3HJYQyCvMGgDot}TZX%^gUDb$w!wyqpAw>gqwb)Gqyg|9s|BG1O}C-_r0BBqfBYc!{oPu!Zb zR+{EywnpThIdLu4oXjpx&?g^V`b?)jSaULm`k0eBscrD*MBn?db7?O0^Xn7z$%7Me z$uTE$%V*!5%+rXBn-hHIH@(h-^}XkPFRWMqXtQS@vtnaprbe(5TmTg4taW*)?Uxf2XgEg9q z$tP~j~06KE~H~`1;t| z1t;sM=eh+ad@Z%)WK(^86;4=}oNOjNKWjLlp15PodOh{Q`fi&`*Lmh-i$>%gXM+>` z^&1fzNP{(+8_Fkc%|Vr>IoYxixo1vXi!~>M#R>Z4qf4KS)CX%$w$i=rZ>_%OW}8N2 z%-jrVMAqKuot-0B;;ASWF+_~U}ufLbv?4_^C!wu__o4p&6dVcnBLp^cFnEfv5gZ16^k*@R1 z&AyGuJ#hRP5#0~o7 zqf4I?)dy=%&epx{pQFC!=G;bP%-o#Uh^&DZzn-BVd2mBLIp*ek`RtpU3mTDebA!)3 zPty6Yp2LOJ9L&u{io*?Sb3=Z~4gV&&=H`-un@iR6vVt4_olnWlb^5nIaKpOf=K4mY zo}WG3P*2=3W`DW*V12h6r0YC$b7LcNkF&uI{uPagE2Y62&8y@Sx8_Zirn$Mf5xHk> zT#Gd~w}>0`$w!wySE~=!oZPB=+rLeH&CTtN$e6jgqY+sHH-3FXKl0#)dUDLoo$}c? zH+MB6=4JWpo10e}k#Tc_&paQ~`LLeDtJNIL&1;Io4Qq2le#s5r15$JIM#0US>iJf| z4ga37p1Jw75xK|N;0FJlM#Q_) zV2$Q`@`+pXvr5z4eBOxMGdHfqnwu}g4f^DxOP}}E2Ww7#rF+}|wfdTyFB_3DbMu=< zWDWiO8i#)5!437~n47QUvu|#G+lY*t8+_*ZfzF5Z9DY~L!QA{_akyb^ZpbgW;d|$5 zZoV$K`J;ONso;ixgIaR)cm2CmxM5v#^N&WPo}WG3P*2=3X8(=)V12jmrRzL%^8?R| z_cp~1{YKTCr(ntzc`+?xNaG|kPw8j*YE#fAN{;cRC-|b7(jF{w=X* zYi>GWTZz%zhO0!TN6FO4oVjX1r$P9%tkC8~jn55u-_iHJYQ#CvMI0D@}7VK{ImC z+_)BNZYFH@{TYHj`RLMT4E4d9lZkY1`xC3LxtXLH88bJNHY023@7FisSnn7n_arjGdFWIBlkEP+~7~&jF>?htkIlN zK5=W#S!tS^xtfuC=Ek*Hb2GQNL7#kd=`)l1V9m)qy0`s#)z{q2*NlvroB5lOHT3uE z8~Tw4H`J43ZWfTwzPVYj8GUhm-3_04&aCrcJ%@#=IhdP;6^9$v=7#)|8@|W4=4R1? zo5j>~@q!z^KceJjd410W+^{aWS%JEO5xAkAxOrgqOQ;XlcUw`q&NDYFH6!;p8{FV8 z*^F398m!S=T0U`Wu3TxFn^l^Td*;TqSaY+gxIv$Mbm_B<`e4n;YPz@m)z#PBtkH~& znVU76ku~)9>l^x!2RGD{V{X=x&%U`?yBQfbH~7qRS)C8-IjmF7!Q8B?INY!{H{_Sx z@Yx78H|rPNY@nVS7ToYXH6=HL^nEsP!@A^V%VwmWpFP}APuwwPzmfW2eYe5Vb)LD| zsu{V*+296$<7UJr(qN6|rt*nfbL&de+-%c~+%q?>#hRNT;s$;4(WTF3>Vq{W+v?u- zw^Ltpvwbr%W^RTyBWvjI*EjSd4{oR@$K327pM7()V>2>tZt$7s<~kqNbJ(exgSpvR zakyb^ZpbgW;d`oUZgwrW*-brnFSz0Ruu5+B)Awe<4eOGd{hN_`e)e!fJ#ojF{T}Lr z_1zATuJg>zfz8N0&IUL5!`Z7GqQ&Metknf^5BMga?H(<^4T{xM>Qkk<_4d6?xXWz zJ%^*KIhdPc6o(tu=7#)|8$Jt)|M=c4zCOkGW}Q%QbE0~lRB*%h?v>n}rSIp18`dQ^ zXG_n|9&V^7?ijN_S$(j++d0y8p1C=<8M(*V;0FJcX2hw|V2$Q!@`+pXyh_vDoZpPx zGdHfqnwty64f^DxOP|x#2Ww6))V=Lrq`v0n;$~#b++5O(tl`fnzrLX#d2mBLIp*e4 z`RtpU%bJmKbA!)3&(Qg>p2OwU9L&uXio*?Sb3=Z~&6WLdb5+63)#`ao!42Q~zGh^NU)x~mM;_cz zPmZ~{Uq1Wh=7DBp+}z+Z&zp2Utmp7xH3xI^km7K|+T4&|a>HkD)xY07QgHLAdOlWg z!}kc6+&ri68-^R!B{$DEBlZ03;f8wRjxqbk)d%aly&zrZnVT1zk$apCZt$OIMm#AE z)@VK@pSU$&sx-~b%gx9=bK_d9xp_t0pie%!^m$r+u;%1d-P`_a>T7OZZ$`$<%^S_g z8hG*R8~Tw4H`J43Zr+s7zPWj;85uV>_{{Seoe%3dyj{(~+`OYW+^{w`W5e^Y6io3EOYd*;TqSab7Raf3ej=+fs?^}(8x-|61=f3Lpg z<`2!tn7R478Ce52etknf^5BMga?H&i<+E>Y{?v?&n;U%Q`I*j#^&Gyb=3s8VRUB?u zn;Y^=ZuqRjnw!5A-27ENzbm-mvj|FV{;kg^fE(5&H$OHb_5AGNhI-U-^hKKba< z=Lhw{nv-^`@80%1>T7Pgd{#qi`1{4ZR%8v_`t=R{$b%bl$uT!0$!Fi(jNFQhn;U%Q z`A?k>>p6^4&B5G^syN)RHaFy#-0&G`H8-QThQD7t2J?w#jM*Cgelee4QF1e3tM4-{ zcqpt(ZYF9)>iIdr4fVtwWALeb2GWPL7#kd=`)`CV9m)Cy0`r))z{oi)rySy_nWC(ku~)9YaIHK z2RGD{V{WFA&%U{twiOvSH~7qRe4P*LIZRj0!Q4!*INY!{H{_Sx@Y#|zH!~L8%%q+( z7u@ifCnYy?>vK@xhIPr!JgrDQKYO^Lp15Poeirq?`fl?|*Lmh&0SxEP`zp(n6n?+iYF>|vh z&jrom^$q>VgB$9}F*l3JXW!f`-inNy8+_(Dr_P7<9G0l&U~ZOF9Bx>f8}dtT_)NN* zo23hGmQl}T3vT$FoRXVW_1QUa!@A^VwN|8_pFP}APuwwPznuDDeYe%6>pXL_Mk{iU zv%wAi@~wy!q`?}^73CAR=9-nJxml|fxo2)%i#0cEiyQRGN0&Y;sSnnitfPC|Usrw2 z&3diKn7LWM6^u;wrxe~`Pst_^~4=x_FJeA)_2=Z zy3R8<+qWY3I2+vH4{AkhDGk|wPE3$_Eetknf^5BMga?H(c^4T{xySF0a<_4d6ZmsiSJ%>H2 zIhdPaio*?Sb3=Z~4WEHqbF){$&ED#{Pr(hJ$5nE3h(4nWZdjMx9NLQ1^RtH=>WMqX z?Dth4tnYT1be(5z4sS*7aW=TY->((1zcg5*d4PQ4);yxpG&e`KBKORVYq93$C~<>6 z`RLN;K=r|zlcRNS`^TuSxjD8K88bJ>wIXZi@7Fi>{xH(llPb;|LGu%pU&eP|*!42z@oAXR^*HB4g&}idJL|{r&z8{m6qG>d7%TSITGK z++5X)jGG&L=6R0JhxHt;uI6BFu2CFrSeqO2OK$kg;hLN43U01f&l?JE_}ssen>+Mb zfN;aQ=3)8lo0~^kk#Tc_&pdC_`LLeDqtzVD&0~tg4Qq2le#s4=bk zK2>nTXGNCWysXcUgd5f+H?OoJ_5AGNhI-fq8mu|j^YV#X^UX^0aDB$!Tdl}Fv*%i@*?YSc z*%N*8(WTD|>Vq|F@2HPid$$!CGi&c@w*0x~*F@-(2W!-mjn4_LS^K15?NjyqtYD4LBrRF{oj#`&)>xOU{k|2c zXI@~9dg6{T>(A8(>%09yy3X^=zHUYCnKk@hwIaTd2G$yXtTfEppIVW7+z;09(fGAA zSaYr~` zH53+o@?eczay+-c$!FiJ{hjv{dEY|+-{SKOzS4QHzV|<>dz-cI6^Av}W{v!kH9jl6 zX6>H^YyVQue;2Ir*|#NY-FDyS;ldj0lC@qtQqR1=TGqFJ$=Z+VgZ14;lCEn#vyt18 zdu9!vrawsoYmHGV4YM|CJ93Zv!5TgqjdtJrsjOk3DWAACN2@f=+UV`bJ+tOotXUhQ z9oZ9o^3kPFOMS3rZA|qsYh$$|WBz?@Y|R$N{MrkB@?ed6ay++j zpsn*@eedzAdz-cK6^Av}W{v!kH9lv%W^Kau@b|JOQqPIo!{5v1GnPx%rqSmv!y0QO zeZFbik$UC@)~F}$7_**4eXy>bPP)$X%%*Qg?wK|G$=VT2hGG^9h z*KA?Uuh-Bg57wwB$8(!QKKo{E&UR$ntl{$vrqX$^zV}?!z0KO(io+UfvqpZ&8b6Dn zW^LYrwfWR@{(?0=%e!Q43C@E~qm}EDwI$n;dgcYz$R+L=vtB@bu)f<;(siC^wsbpk z&#d7u#F?dmwZ<})hFM#-9l6K-U=1IQg{8rob1fpDxHXrnG|k%b?Z`c|=31;-TcI7< z6Mgd0rO%@3gEea_s*hP)sT~!hJI1WnQ6H@DwwZLD=b3HZj@&bA`0KSJ)|Cd<8e3EvW^GVAa*zAL8a^89OM^A% z+CV;WYi?O-nzh00$UU>>TC7>ysvX%Aee%(z&xY!QHEUa|k6GKM9T_ugLo{3Z`86B* zBo%!A3d{*7x4Ny0=*ysyM8%Hf!XUtnsrX{ci11n9lc2GNV&#bu?Yt{~KNA^UYe01qE zOntCs?GW`bYlpTYV`l9z&6a+Cy@oz{utq&Op4;K_**9xPv?JqY4WDPQr_O`*y^pN! zZPt!b9M)KyHS$Z=_&FdoYsVC<9jl(l6|C_yFiO@=*U!a(HP$6-XS5^r%nPhhPuwwP zeZ2Z$eYZ2E>paiwtajv{S;IfE9dUv*u+})c(lBf1v?KSpAFSb{agsDxbFP!+6SwBM zm8MxcuN}E()?AAV)>xZ0@=MnE8D2GOR}`#Wsh(FAtnsr} zO4e@H&tHKx)+KAVv?KM*3#?I3+%aZ-wfbOvw_By_JkRX5cI2K}!@ss2ag8*v*0{aW zFl%?TBlox;tl^_^oitc;uIuF!x8|LdrdhkI9l2-LT#Gepcef*ZqE9}$^tnNOux9NZ z^)YMrwj*O^?LN(xe$0Yj=#vL))RW`6-7lYgv-UtcGH%xJc?LJ?JXqiR!Rp>-?IFcs zjkQ@LzhsS{e^s;gNWt2p>iJl~8b8ygWbJwVoF7fUDUJ;h;-wOJ#-WR0JJSF`p(!Pq?MwYU zDOh7&vi6&Hq@H^XoPA$%8fO$?@F&ET4U|_Lp{K+^pgA41T5aV14huR`)h*-zg4jtj!wv zC2RcrxSF-U7p(n5J-;tl<7bqWtTj7*Ker6lSeLA|I+1$j1=gr1?ijQFL4B~kTU)x$ z^UON*==A-W0VnwX;(bkNV6D-uG|XDB6S>Fz(7;FI-_l^sxqg&S+?pd*nr3a}PUN0h zb1l}ajnawii9Y%0(&s1j!J4&E)yJ%j)`^Unwb3F%F*2dw7p;ezJIc{h8d%5GO=lGrB@8$Bd z@JiOE(9g$%HP$6-Q+6Wt%nPhhPuwwPJ%RdQeYdHk>pahF>Q3aIe_z9&s1q@vG_cl~ zrqVEL({>{FxF4+HqcO2GSaYsP7nBI9NcpJy_e&WH6J zX0PU8_U2F=_E?)e@=Nykx$iZ5a~15(t)BA~?C~=gOZFDk&t-%?)+Kw3bt3i55A0D- z+%aZ9ulitpx5cIFJkM+ia*E&Q@aOMD%qI=3HI}S2%-&L+$UW`{d-!N9APv@>YeD(M zt+{liY4(=sMDCeA*J91yvYp7D=#!5weHKz5tXW%5v$Mav`kK8JI*~E6w_+!<#;@hD z^dk@Us3*sBTuDCrW^d(AWZdlG^Gp`j`LLeDD%Bj!-l~ej9&58le#zcy{jj%s!QL9` zxn{v0Kl`&}ZzKIYP}pN#vbS+3QqTOr9`(c>WA?TCCaIvJ=@8ee%(z z&-&_vHEV-4JNsLyui4wW6B#pm+jJspV9u}a=tmyxQBRKNI7B}CW^db0WZdlG^Gr6- z`LLeDcGVor-u8;a9&58le#zd@e%RZgU~fnD+^JxXpE+Byx0iknE$p!_+1tAlsb_v* zk9y*cG5ej>2kX1-BVFftX8U#`_skytuAPWoq=B`@ewBvV+n;CLiM$`|;iIveG+1-4 z-Q^Rv<^h$a**mZkxo7rVi#2-(sVDm6qf4JX)CX(U4%Y1KAELfy@6b+U%gdJaccb1-{HDGqzA%^vwBdq?-f-Z2Gx z$ExRX1$+Fga zM4TWEtToQAG|b*Poya}z2YdKvoFom_oaqPFEJ=bE*-ua!#p6HX0 zE`3f>AFNrsK(n)dq57J=i#m}pvv+YPvWEW5hF|m}5B8`h$8)?yKKo|x(oSUD?BVlF zPSyFap2KC;9L(P3io+gjvqyf(-WC0@cV)reRqA4&$U>ycMs25y~szGJ~yfl)~wyD+1bBOea+teoyeHkd!Q3p zLw~>KqaS&&M?E>7fi8mu|jv+{{s^Yu#8?7h*6+%tQw#hSf0JCQxn zCm&tj*!x61KP}k%s$%cAoyZvMu`b#BT_;k{ z{J@CvMF@Rhnk+n@;4O*>f${?0wsb?1?`4=+fs)^}(98KWldO|DwKT@2{Q6nA!WT z6Inxlzuu!Cd9X)4IiBO+~D<$!~N%tmp8LY7S=ad&Oaowb>)TWbcQ5 z*!yR}-oMoI-vxV}?k|7e>vkh!u*bS&uh)&#Ge5AG_1#~x_oMn?eYcUM>srrj$(siC^HbXaZ&+Oq(-i??{8dz)0SZSEOnYxjC z+z=1SA-&C-qBGkdPZn!Q=Okv-8TA6@!Pr9N1*Hk)Q=e|GgX zdvkOnV`gv8Ze$Jp{d$jn~D<$<#U@)^nJrnuFP!S8>>5 zZT847*_*E)_U13xTR=S*EZAGJVsEK#WDNFLm+URwjnp$gutz;{$C&*>>Vx&&mXWUW zJhNrHk$Ywjf01s)!qUK6W4TJh>@DAo+~a<*hmXdh(qPTG7L!lhnk!VAW^cuA0!f&hyN+>_+aHJ^T&35gSMYYmLE`hS}Sy8@b2* zU=JUSjikYvb8Rf2xHY$~G|k>N-N-$&=US}U8`6#Ji9Y%0(q|L(!J4&gH9Py;sju1F zz8e`cdqcaCHT3uEJ^GOcd(@NTIqo2zeY3Y?H!^Pa@OdVi>U>zwVW(;iW^ZT3VUM-h zBfn&C7qM4<{_3s;d%LOU?ge}MRqXB0ywGX1a$T}_KsQp){JPGIFJ^Vep5yPZ`wZ_4fhS@u$8@b2*U=JUSy`;gKbL}mkxHS*0G|k>&-N-$& z=US}UJG>j&6Mgd0rO!Uwf0V2^s@jxqaF)CcRkohM!Ad1mK#BlpZ6{%PHaQ>B5m#s!sz*}JeC zxySur4?TCCZ-q#M~2ee%(z&zb6jHEWk@cJ?n* zU$b|4H!^1SuINVA__ZFEe&oR(_2hVtSITGK>|NE3jGH}tp2=A{AJ%iYx|)O8yGC)? zV{P`xFWI}cANH;**t=dmZz$NiqhjyQZe$GhSeNYG)s56MKd?tVamSebjp~E--R_pI z^E|VAx{-Tk5C7(F#7)w`TI1eI!|dJHjojmYu!oPvEz)4kxo(wD+?w}Snr82TZseZX zb1l~FJ;<|HFY?i)&u!|1HER!PcJ?1uU$gf}H!^1S9_>cfz?@(2(T_aXqn;ek@iF=A zo4v=ok#Vz!&ojAQ=fipqPgHX-drvA3d#ueK`6YW#^~2uN1$)n^=d%TSuU726){Tt8 z9_y05*SnE=<_GquC+--te@=a{zS|qpb)IMTW;b%r?BT!Ajd)%fSZlmhX_&pYyODd` z5BBiUcu^XxIoC__iCgoXO4IDU+l|~ad#=Trz4yA2J<%s0UHZJNK3KE%zGi3t1NAj~ zA9f>SX78hJWDV^3^&b7mgFWiW@f<&v&%W9Fq#GGGd-yz)S9CtC=kRGY2ebE?;;_fs z?2%ux_jy0;eNnLYEA{+!!QR&udw=Xk#$b<_;aOU_Wsh1+~a<*hmXc@rNNqW{Z2k{YyP#;G<)B5Blpan zYq4hUZ{5hA=#!5weSWV#ShMzb&CdQm)Yt5NPfd6DXWIVIjjW+Rv*8#0$b&s{$?+Wj zDW83__b=X4)TWbdbb*y9Uk{M-kgd860& zUTd>A{QbRAdcS;cdDLEH4E9);?2X13-+IG2gFWhrn<-}BQXj1EHoA13=b4Ssi`+AN z_#OI618a>jD-E+ZRxfgo`@tSQ8eM6y=3G7b#H~4YrD^uY=|%3DJ=bE*-nhNKnW9fV zy7U=IeXwS2Jk8Gj`08u+Cg?@R{QKU7y~rB+`!yf^$b&uV$?+T~lFz=`o46MlH+%Rz zlaX~ktmiOEH3zddsp7E5+U${EvNsv`tA5VNj!M()&Do3GGkdPZn!UMtkv-8TA6@!PuRd6_Hn(PHe;)NUd-L`pV`gu@ zUStjZ{d$jn~D<$qYIl)^k{}nuFO}NO9O>ZT847*;}|D z_7*AFTU0$4E7)7EVsH6gWDNFLm+Y<3i_|keutz;{$C&-%>Vx&&R+O&uJhPR0k$Ywj ze@W&m4Xia*t~AWvD!s@(?gxALXe=cS)|_i;`NXZcYNcuRR_jIXnLXEH&ED$FQN74V zmp;p=57w-$q1oAAQ+>_eTD{1a*;~68Swnxn-lHFRutz;Pp5r?5**AOZ_9EkE51(hU ztj>q^9M-GmVD{Em9QIh7J@QNTHt2`F4GZ=*QqPSG_6Aq%ZPkm6!5-_9y{&tZdgcfA zs3-0iv)@F0u)fqTrT4Xib`tu)NucD=|w?gxALXlyPG)|_h# z`NXZceWhvkhV~-&%${qpW^adHWKZ)CX(UcGT?b@1(wFZ|7cQ%#qP zv*8#0$b&uV$?+U_mCwG}+pQNFH+%RzlPz^Vtmm+MH3ze|hvKlu+U${EvNxVx&&4wJ6)JhQ`lk$Ywjf4^SD zzS6*2>b&Q+~a<*hmXeo(qPTG4vW2_9A1j$GT+iqF$t)`GGy^ zi95#Z&r~0*?{=|to#&Zd(u>?Pd-!MfBF>Tq)*6>q8fNdZUgRG4gFSpS&XERd&ULPQ z;?}&p(lmQl^dk4no@=pY@5)|ePxQ%0mp5|Uc|!DOD0~CHZ|mu^J6qK z?mi<$#H;EvvicBvtkTuZ}Inb)sRaLeTcgcjK1B^ecmaw-!0bfsn7ex yeLm<#j1iGFd=hNaopYa|`MAnc?4tb1oRjU>Z~Q!Q z)s|Xn>8M-(=h?e=ckZ;BllLMTF;aBn&R*nyqee5@5otTJUD>H*r;do>qZ5sYSb4`G z+b_PtOuOv5S;R=uie^OqS~}WsM=xTAh}2s)n&~ecqs02%h-o8Y(4g&i*mcw0HXA%> z$6favwAFSyZaZku;9YmvZqUv{wjI3Zkj(~dy6fOUyKFgl=OLR7+HA+k;m9BBWdzU;rtK<`g|Kli_EpwGXkFZ-W1fcp;_yvvZ^ z_6#1=U?z!e*IZ= zpz|;4%l^X-^!^3EW&hC!djF!n?0?-rpMSw`+5c|?ef~v#*?-SJn*ZQUciL^Moqw1A zlm2M_{nVHJC-|fLm-YSdzxE&9zn}WD|Hgmx{9XS${15)4=ig6#*}px|`4{|_&wuKH z&cCQH`+q#p{sq5f{~re0zo;+!pE%IxU+`P@zi*(=zo;+!-#mc(^BU;)zWzTlfcw|= zW&gDXaR2|*_rw1&1Gs-(U-mzH0Q_hD@9=-x0Qj%#%l-=ubp8dutk9HmBejt>U# zT!y}`>;D`J58xdCN25B&V*{ASe_r4H&N+ttBSv=~@_#j|bF4kkbCfgn|0rVYvVqQ{ z)Tqw!)E_+;|9;P(mw#s-{Mr5g{r{yujp`iN{LwtD@&Ek)^7%l|QEF7@n17(>C}&zd zmtzNdj#8sKM{5A*$lr(lpXajf0M7A$G^%sFGthICGp*(^*&qG?GI-OWzo(k9>KwZa z^c>|(t8?5t(0P;^#W|LXAM#K$`h$k8m?=&KQX+(_OjF`0(TA;(}JhpIZ^Hwj#c4MU2#rn64eMR6AmmcEtYeh_l-f zx3?o+Y)5?Bju@^JF?A1$<-H1cF z5f^qN?&(In){XeC8!<*NV%A>7O1+3}dJ#wVA};Gij2Z2GpKg+TSFO(F)&h;b8se_qt=ae9qjPp|_U#cTHf^+0Z?cG(D>@nDJx~7QJ)Mmx_tdPK&e(aJSi3r08V$B0Nhw1_Vhoy;Zf*bmW+Vburg z+A*c;yz8Tr=N~(tU*8#ijCT6N<$JV!XJw7%2=a$7G{>nl57ZpSi^x5me_4yhH_^*8 zOc0Sgpie%!NNk$QgCa6&zC$C&kW>Vx&&W|yw>%*h-PxyRX<5B~Jg z_Z~e=vqp19Y4YB^HRr4}&B6>v^J2KDzXoNqw;9WFGY~C-X*R%$&@p zxxk5kp-P*09InO{Ep=463F(+%vXWyKx6_Ig%e_#&y%x@W;2kU#UUESN9tfM%bur?>;mz?lA zu;ygFf|K>tbAy5tUe}bIY^K*baKgIeWJpBn`B}pW^~4=x)*Gr1)_2=ny3R8vTSVj@ zXJbD28}avg(qN6|#`1|bZNt39o%iPWIR9VK`x3a&kaK>iJp23H8JsW7d1957u`(P`b`DCkI93 z9%q9S{5|s-^}Rk}jpp9+_bN0Gt~AZbArZM}PF#yMCx?m?^vOq;KKrN-)|?!sKIY`` zh>V$&BQzKK`Sl6<2j*7^*Il*Us`|3Pc&+_Q%-sa>O#o>gtIU&E~ zgg@WboE%qha=dz;P;kPZk4jF?(4U#$gmuZunGvbyXALLR6L*YRpQt`q-|Z~vI?tS( z9g%yS4NmY+iine?!5Ym|tc6V~R0 z{E`#ipQ<^zyx`;t^}Mp+gg--=TvMDCdr*J91d?cxM|^3kQwb?SpPCwHii zIk__;W9H;fnhX8>`UHLQ;DmZ|%*mhSvu{rBipaP*!DoKg>pWQB`|j%A=Hwp5;e@q0 zA;08=_ho8M?khOCUp*fvIN^Pkl9MO&-V2N;e>kPjxp;8)d%alJtbY| znUkj@a*wmY3I0P7@vt;lqxpz@;?{ho(ljT}M&zD3aV^%IJSR@jCm&tSbiAX&^YdE2vxMR%v@9Kl~-QJb1 z^UTS65xK|N-~|8mhwAA%-P@derZ}9i zHYenlobdMmH78#boP4RCUlpA2er?IgPx-ufgR+(Dl9Qh!QqRvCPN*mD7_@GVtL&K**J8~{qtW+F(I+2W`h2fG zSaZ^Ba5nn(y}#Sy{m=$1_Px*6ZuHHCetvy|K6!9LE;;6;BcFY9(&fF^#?bdq@R{Ea zIuF+O?&&<9r8yZ!aX4XZPRK7g;qUSIkA1%%J6vPv`)tEE`u+?tLSyLrY`mvmaxzAv z?|ptaVO?@EW+PJ1=YtdKi95!uM^qoI?>3fnoo7zQZba^JHaNi_sSz=N#b>34iZX zax$a-E(lIomz>Phh}845h7;vvo#|3%!zBU=45tpf&aZ23;q211by=0 zgnDw!$=veUHz)J(oQkg}_{?v5od@fC&s*KwoXn>ZAZ@2l6zc1qJ34dR- zP{GN<>bXe434b?Oa?qhBXZB2xE5^92%{m1)>#FB^1vmVCa>>mm`a5R0VO?^wX(Lk4&mL~5C+--tUtfK&zT0Nf zb)LBy(umySY;c3WK_g;AX|P6fBl*Owxp}2&ZnkJd?wK3cV$IE#;s$;4(WTEI^}(8x zt#ohuTdS|R*`^U0GdJ5dB5VBG1xr8j;D&l~%*}T4**7=aHzMQa2A_Futn*<#haIXp zn429Hha1-BhWwHn{;hn?&CUfkyQt@`1vmT~gp!+m_3skkhIPr!evL>yKYO^Lp15Po zemC{O`fmG6*LmjVfJWpVXM-F3-5U{mNP{(+d&(zn%>ye`ZxBeDiw{Cb9dD z+*{|vdJacbb1*kYD-JiT%?UZU21rH8&SDB4g&}!bW5b-1zkk{m6qG>d7%T7s+Se z++5s#?ESeqO2OK$kyq?(({3vRAZ&npXV__t0aH#h3v zKfw*_lAD_vk$Qgia6>(D$C&+9>Vx&&ZkDd|%*`#0$UV*mH~3dKBCe4JYc#KwPu!Zf zR+{GKwnpThxp6Jl+}tj1&?g^V`dp_zSaWiR?rr~0^))wtYDC7&&7T{QHT3uE8~Tw4 zH`J43ZtjxLzPY)(5g9i(_{{Tqoe%3d+*8fL+}x`;+^{w`vF z-0<(?N^YLiznOy@)+IMjH6r!=?BRxb;*K%&J#*t)thsq!+@McBy7YNWeX!=_1>M{Ji|T7`UTQ?f%+1S< z$Qt_lH4gpAgB$9}F*mQsXW!hs+K7ys8+_*ZxXy?59R5WMqX?B7rytnc=|be(5zKHzT7O3X+*}%&8LmX8v6V74gJW28|uk1H=oI8-`srOh>V*XeCGMK&WH6JzNqG4ZoX6; zZdjWe@=I>`9+H}yuM2LzQO|D+Zuob&B{#q7-{!&%>yn$_cu%rm1a7D&ZXTHZcj|-n z-AEwUdFG};pc#2KxWWIPXDE$aM>BquPu!Z#O4Ho5__wIdq3{2?7He+W&A#75&?g^V z`uwCmSaZ^8_TAflSAES*uNfILH^Vd|Yv9JOZ|FxJ+)z)BxfxbI`{ri2W@Oyl;4{yk zbv~@;Fnl!!b2EbCaKqZ%kY94c_nz0>jMNyn#sn~{2c_HaWzamSebsOp3D-Nuuy^UTfo&B#5@1~>SlH6uos25U6OkWbv26I7b! zX2NFVp17He)M5;y3Rk1l=2R3EH4nOOI>KZ*L9n@O9IF>^CnGqQ&Metknf^5BMg za?H)-^4T{xQ#7M5uD?&gXP#r}d|1z6%4!bgW-7(uhPAmNzvPDRS*^L5rr>5;^_;HY zhJWv0axu?(fP2R!@|`Z%*`T-!wqY5Lw?B(-|Js< zvsl5+;_A6X!42OdQgX9`zHbC>SeM+a*o@Tkvxgh%i95#ZmsB6D@3xY3oo8-VZbt5L zHn_oGsu{7gG+3j#jC|tOT&2=9H>)-y_sorJvF2tqaf3ej=+b9d^}(8x)pc+CYpAce zS+f}#GdF8BBWvjI*EjSd4{oR@$K0$fpM7()PBSuYZt$7saylQ@b6B^UgSlBxakyb^ zZpbgW;d_K@ZZ;^m*-$+}|=GK*_x!I-}xo2)%i#0dfiW~IFN0&aEst?wj zY^QtM-(G#q%?{1Tn7P@p8CgSrzrLX#d2mBLIp$_3`RtpUotu$ybA!)3H`DpBp2IHH z9L&wGio*?Sb3=Z~4WDhqe|)bJe?P_dD(zWtvzL1AU2wzqqLthntnW{Q8`dQ^he*%Q z9&V^7?ijP*M}4rq+o95Rp1C=!8M(*V;0Ax+X2gEdV2$Sf@`+pX@JiF%9MO#2GdHfq znwulV4f^DxOP>SO2Ww7_(!K2;t-j{wm}X?m+#K7Ctl{@4zrLX#d2mBLIp*d#`RtpU z%9L&v$io*?Sb3=Z~4WEfpb8~XR%_-`6YQYWPzgTi}uD+)c zZdjMxoY#!h^RtH=>WMqX>`zl4tnYTdbe(5zE@(#XaW=TYKfM`ohBR2Cd8T~g*1WLN zG&dJDBlpaWYq93$VsV2$`RLN;EcL;flS_1O`(+rL+R&CPwy$e6jgzZqEr zFMfSPKl0#)dUDLo1M=B7HxD)==v+`QC`+~aI; zga2eR;wfpcM)PU;#I5;qrD<+nX-4ju8`omZ&8y-Dee%(z&okyn#)HzW1@?BRxb;*K%#yL~KO z=b4*Nnvr{)4Q}w?ZAQE&4c2JBFQ2$IKdm&)&1cQXJ#*t)thxDI+@McBy7c)#eX!=_ z3*FoPm+EV7zG_Cs%+1%$$Qt_l^$q>VgB$9}F*o1HXW!g>%UO#*hvPHP4|P7Q=kQ%M z2Xphi;&8*-+>l>#!)Jcg-27N@^OJi1TyVqZF_hf2TYaC=05_~lZaS?Ebk!TN4p={nEc^jeX7oDFX9e{DwmCJokT@*xAfXGz?e!&I8)X4qEbp1E-?*4zx& z>U*Z>laDTa8tQ{JC&TOB_D4`(b2DNqGG=Z@YDL!2->-4#M;_czPmZ}6Sw8#bW|UTB z++T0-nP*ey!+H**R&y{nqbUwItj!JiB{zIlV$IDMd`5>p&tXjU9IG|-{bD}DqvU2H zeXa+CV_kAHaVt{K&mL~5C+--tA6tE}zS|_yb)LDIv=zC>+296$oL0oR(qN6|c=Cx` zbFxa)+)Unz+%q?>#hRNb#0~o7qf4Lh)dy=%rqsRdPo=)*X6jaC%-l@Vimai(U*FJ= zJh-8r9CI_ReD=-FbbJmH_t5)(_{?(xoe%3dOkd5x+{~aj+^{w`i1u9K*vtTQ7&)m2cYi<@2H|Ud(E`8=yAFMf9SogNSi29nFMO%?E zbF)}0vWEVCeM3L;;D&l~%+2ER**7;!v?Al?2A_G(rSoAuhb5~yn46^(ha1-BhWwHn zKAXAbW*KVr^=4W1T(01T&k8EJSxuiG1UIZpZdPwa>iOBj4fVtwWA@9d57u{EL%Pm0 zH*2;c_c$Be;IGh%SWz0R(OgMBacizsX_}k0TakO_#k^!Mu<`jH1W)RSXwHk8l4x!I@{88J$s^(yB zHdY*NSeqO2OK$i~*P5G63T`%4&&>*M_(2qR0p`ILbvxj{4&CQ;z$hf(|XP#T@ zd|1z6uWAnFW^cvehPAmNzvPC`!mhd5x8P2rwsV9m*Ky0`t~)z{pd(29(in-g1+HT3uE8~Tw4H`J43ZcdWVzPUNM z6&W`-_{{TAoe%3doKnrf+?=X7+^{w`|tw=pTd$^&VxMR%zEcLu~% z1vh;5Vad&1`aDFqVO?@_cdJj&&mL~5C+--tzeRnpzS}+0b)IK-Z!2<-*8y;Ye_Jc! zR%u|ZabKlztv*}s{#N83_d^38joYQcnsePDpSU$2s5B4OXY4)Lirh1MuEm}RxwkcIPZz8`qn^(etnnF}C2N1t=WfCp>yovqmDirnLVu!fJui_&1t zxn7b_+?sDxnr7`Et;jvI=31;-`)4b%C;H^0OP`n32W!^eR3EeURx2`Q*51}^{Td33 zK6$W4JvpA+JM!5#Ywxxq<7N$?XYh*7gY~`NtL|;q-d7yfSerHSOV;@8>zcI>3)VhT z&wmxH@ma1VYhUT}U15!N$=cVgNImlcYt$2Wj9LF%eXzdUH_~;UXZ9`6PQN$73H~Ro zh>xX#wZ?aqhFSZ*6}iX#(7;FIQ)#f~T%XA&Zp|MmO|$kRHN1bM&na*%)~x;1itLF# z`RLN;bM?WRwV&0;to_o8jG47xHCq_-YcKT4gEi{O@!Wos&%RmXVG%cL_&kFzbRMkl z-DvlHj<27GS!*f|Ypl&0`6X+7ZgkCByFK(hf=;{d{ornU=zEfUW^l>c$nCz*5r#F^ zC2OPLw})~CYt$3>nVEG@eXzdUsM2+wXEs_pa?fAa@P}p zk$UC@)~F}$7_(kjeXzdUveI>)XSQ5Ba?h;cFWQb+L>gFYEMIAuwH4Ztd)yD!@X=UI z8mu|j;_``GbHz&2tgY0J+%s#g#hSI1(NQn*(WTE4>Vq|FtEi7zTeTe-Gi$49w)FFB zHuTAZHR{Rn+*X&*zFAwN9U1r6HGH1Ik~$C8_g=HQw^>_Daadz**2phe<7ac!tgTb9 zwyt`vSFpxs?U$@=s?XntHP$6-o3$hL%nPhhPuwwPy}tTjeYYXfb)ILoc{_5?tl@9i zj@UpNSZi!iX_&Pw+mURz+BWUTp6HX0 zE`2suAFNs1R(;IccJ0WRS=(N-rJrA~p-&#HQBRKNwu5~3&DxIb$hcX<=NSyvd9c3s zPSw55+Rlo@8f&vge#sg?*P>=^*Mha()N}WOHGZZ;$=U(>IS;VLx@7IZcBGzpfi>!h zJI1W{P#>)Cc93+P=b0Vcj@&bA_qBlpajYq4hSh<0R8^vOq;KKrT<)~p?=K4$Hxc4W+~9j)2Y&#%|eClA)B zC&zO;Mn3yy?bvo?+^pgA4EED`u)g^W=gx zes)O7+By1pBCy7~WbNE`q@H?eWX!BxuG!L$S?~*e@?ed6ay+*y;4Ga7>w8~S-P^2P ztvIZ)Hf!XUtnu@lYSykTSi4R=uP<2RXVjFe-Jzdb18b~H*6wUa>X{cW5@2ND+ z+P&?_J+tOotXaFS9oZ9o^3kQwt?Gj{Yxk>oxSrgEi{O@!TGg z&%RlExE&ccYxq2a+jJhR@BK)1Z?pEO;;_crtdU=`#?N%CS$n)-?FsdKvS5v$g;cWk zl72oCtg$Xxd$}E{XI@~9dg6{T>!;KQ>$|-oUFUgbueKxi%o_eP?TDwPfwjh8Dh;#t z*LLI{_k%TjG@g|PYtHqYeB##pTcv5%UTa70nKjp9&D!7Fkv-8TA6@!9uRd6__PY9* zwKv+4F|+m$&6a+Cy@oz{utq&Op4&g=vv1bkY)8h;8a~h91)T@$d%soP+pN8nw=l)~ zTY4|owOF(ET|2TT`sAZapO4iCYu3J3AG7uYIqjjJPy3^0OFzF}L!UfYBbOY{?I-!{ zo3)?Yk#Vzz&olT$=fV2kzf|`&YriTEYpl&0`6X+=iM8_cB>5p?{`X#uPTzaE&CbyG za{1YAC2PZX`hMOUtg$Xx8-X84))~qbtWi(g3^D7L`e1#x5vA)q&upYl@6cBo zSZj=2X_&Q9`1y04$os(>J{nzVu;yGn`NXX`YNct`M(afGnKjp9&D!Ydi9Y%0(q|a; z!J4%(G&}oas;}7_s}mXX*SWDfkv05&%53;WKk{IYdU8C+apbda_Qvf*#?2l+&tzDg z59>LMSIxofjjuTDu{L|;m+bL#%=xcmZ^DATiPUrAf<1nwV9DMz`Z3!uo#&ZN--+Bad-#)eA|{mv)*3TZ8fI_CPUIfxAoSo)C%d(@NTInF7ceX}=LCo*pK@OdUv>3mqvVeV=UW^W$FVUM-hBfn&C-hSAd zuV8O}^<1D}kDp~(vbUsuz9sCjF4x8dz&ATWOfR`AXC5tX{$dqn@~9%zi!f!TN5SN!NLv*^o};p4r3SpcApaG_cm#ywWgxTXZ7#xF77{qp_hh zSaYt8f${>>bgG z?1?`4=+b9j^}(98BQ-nwN2#ybJGv7YGkeE$B5UZ+Z1_b#@?ej8ay-Xl<+E@0j_X9m z%^p6_WIvq`>p2`>&B5%Qpg8QYHhbik?48&TdnXm_ovfax6zuUcqf7SA)z6WJJ=P_A z=XE0W%n$5QPuwwPf2#UmeYf+a>paiwf==X~*~34*6LFd}u-3S+(lC1$bt3n;AMD|y zafUQlbFMSx6SwBYm8RLdq!YPk_FRiKdzW@1d!kQ1y7W0qeXwTjGR@BZ^>^)Vm_q2LGQ?U0+#onu($QbOg zF4_A_CsNP+z#jF)9b@*-st?w8`>S-F=b8Pj6S-&h@SpEQJSPpTHD0SU%--KSk$c<^ z_VCeoK^m+%*NgIrTl4iw)9k&`iQF@LuEm>yqE9}$^m$2rux9O_nw|YO)z|F3 z)rpLmy|+7&HT3uEJ^GOcd(@NTIld#GeY5v&Co*pK@OdUL>wH+x;k{}OX77E)VUM-h zBfn(tgMQfiuwd^a_54@C-j@}7Uv(m5u*bS&@9R#ap80`2>WMqX?EkGkSl{g%={nCd z`<7?f>3cl`H~61)B0iP|)*9bc8fNeNPUIfh_I~R`*1#VBU-FB7Yx{)#1V_mX0YBy5P=Yu`!i95#ZhfyD_?>3rro#&a2-i_Sz*FF5< zx)H-l18a>jDh;zYW;b$=`@tSQ8pBJ2HRl>ZK5=V~RcV^NvAdCb{<`N{tl1l<8`%?m z^3kQwi0XqiYvXEm_Qz9Svp0S>GG_KB=tkDi->>)RM;`1^Pmbp}p?vnu-bCHVxY@(! znT(|KVLgY5t2vmxNfd`Y)@G0VlD$d$VQ;d6y~)*cih{ivD)wgVM#f-|b;;gL-AFz2 z1AEjHcZ}IjsXkcWZD#2@&oi5)8@Xrp@TcxZOeGDhHD;|e%-(F>$UW`{d-!NfBMsJ^ zYg+lltvP$8Y4+ymM(&wC*J91yoZZNt=#!5weWp_%tXZ2&v$H?9`kK9Ycuw7+f2THY zH?oHQe!WLO@?ei#ay-ZRp3h?&B5#~s5tDgHhbik>@Cy} zdkYurEux-_7VIrsvA0||G6s9BOZJxUM(UX#*rT4fW6XXr^}+gXD@fOQp4p1s$UU=% zzeG1;acN+!u~MaB_Ezpj?r}fZ!$)IDX|U#8OUWm0%~dK*v$twDa?k9!7Hjra>qhoO zpL}%bv$XnP&D!dko&7b`*X*s?jg0y0-df$r8v6V79{tFJJ?hEv9M_i5zS&!+8yPoy z_&k$kbUv)-ux>R6v$vk&u*cf$kzca6zSt{2e|3X`y$#iKqk_H7EB3bNM#f-|b;;hA z-AFz21AEjHcZ}H&QXj1Ewv}|9=b3HYjodSP_=CF<8%qOgjcqCov$t(Ga*zAL9zGhI zNP{)!+EhMqYi?I*n!WA2k$Yy(wOF&aLpQP~`sAZapUu<?NzY1w|eeVuy;ts-l5&d80@hw**lC`7M#Hz^~B8-v)@;Ju)f>j(siC^c0@OF z&+Osv-;LN$8dz%_S!tNPqq>oM+ztX3f9_&$1j^}uieD=-W$=%4f z*~90V9IW$UJ%>}OIhegu6^A|6W{>=mz0>+(@AQJbGt~3Ug1rkX_Acs1#$b zX|U#8=gB8-%_}NRvv*}Ta?k9!7HjsdQcv{BN0&b5s}I(!U9H*Kzeat{-nHGxnAy7y zuDX4H&hYC!`jH2F)Mt)9+u(Zn?3=wCx{-0ShtD&)K<(siC^_CPms z&+Osf*^RhE8dz&QSZSEOhq{q_+z^;$qjG4VByOA|~z4Plm`jH2F)RW^mJ|&-hv-fm2 zGH&+pc_w%3d|1!nnQ9JZ?^(rRkG0t&zhv*Ze%O1yVDAO>e6e8fwTivJcOzr4$GT+i z^=_n|`GGy^i95#ZUs4~e@Aigto#&bTqZ_$r_V8cnM!YNytTp~wX_&n?yODd`5BBiU zcvTv#IoDt06SwADm8RKyyBoP@_FRiKd+&53d!kQ1y7c+0`e4o4yPBQ-_te+yz2A+D znY|CXku~&ZHvFO=d9X)4IiBN(^4T|gA9W++W)Gid@;99i>pA?ZnuFQwf0V2^s@jxqbs)d%al{U}}Md1gPs z3-4R$vvlyk>_&Vc4XibOt~AWvFWtyJ?gxALXnZ9N)|~5W`NXaHYo%%Se(Of=nLXEH z%^rUlNl*02N0&a|s1MexHF|xsv)}CX&Drd=dXX`+*X~8u(BH55=tmyxQBRKN*pbh^ z+3WTq<7N+^XY#GihxHtKI**@^*&9Z2*kf(>$S>I&mcN*-e$L2ny`k?d58vy1Z+V2? z(D#6~amdETx#$b`l;%+%tQw#hSeddyzfSCm&t< zjHW(Vvo?`tXMbY#HG7lvB4cK6(q3c@{r!56e&oR(_2hVtlgVe_>`mT_jGH}tp2_Gs zAJ%i2qMC!*n^JMuV{P`xFWH-_ANHm$*qcT@r!ClTKd?tV zamSebbn1ik-R6+4^E|UTdy#u)4}XSU#Prg@T4SzC!|cu7i`?UWu!oPvjM8Auxn`12 z+?w-Lnr3faW((8${tVY*&E9;y$e!qvk1l;?Rv)Zcn_siDzkvFhy#;%bF|)T2&q)3K zdXIkO!5;PGc#aFpXW#5C(u<6nJ$#?VBp4oFP*6gj_i|mO$`RLMT z1@*z2wRJQ*`|GN&*;}s{88dt9_abZP&usWbKk{IYdU8C+4dkK>RL#Nc4N@HTSerfaOZGPIhrPiCdz+}|rUiT3RP1fri;Te@>yo|gdXakO2ll8Z z?ijP*OntDv+xF6Ro@cg0FLKZ9;cwoH7$OauN zM?dmlk9u-E$GzmUZ}#@4KfmwlwKP7@WNV!d>pARG&B5&Lt2pejHhbik?CsYNd;1sc z9iW~E7VI5av3FE2G6s9BOZJZLMe3O!*rT4fW6b^_^}+gX$4J+Cp4qXz$UU=%e@HLl zU}<2jaa^Tg_KxpG?r}fZ!$;##X|U#8hsh^y%@Zn3vv*=Ia?k9!7HjrS>P7ZMpL}%b zbGZ6o&DzPDo&8hP*X*6zi;S7Q(|VCL^!MvM`jH2F)RW^mo-Ut#vv)=>GH&+pc_v5b zd|1!n%xVs1?<~b(kG0t&zhv+1e%L#wVDDV@Jg;EyvWmUSIXgOyR<29-uINSTnIG6A zm$+lh{(SYp`fgWB*Lj}VRlUePvxk3SFX94eV6Aa=rD682;TiNI?+1JMXj~)>)|~5N z`NXYxZKY}UuIokanLXEH&EECui9Y%0(&rNO!J4%jG&}n@s;}9*sTUbDdpGwYYv}LS wd-Nj@_NXVvbG$`9`)2RfUS! { Add(ArithDetails, Arg3

), Setp(SetpData, Arg4Setp

), SetpBool(SetpBoolData, Arg5

), - Not(NotType, Arg2

), + Not(BooleanType, Arg2

), Bra(BraData, Arg1

), Cvt(CvtDetails, Arg2

), Cvta(CvtaDetails, Arg2

), @@ -569,12 +569,12 @@ pub enum Instruction { Call(CallInst

), Abs(AbsDetails, Arg2

), Mad(MulDetails, Arg4

), - Or(OrAndType, Arg3

), + Or(BooleanType, Arg3

), Sub(ArithDetails, Arg3

), Min(MinMaxDetails, Arg3

), Max(MinMaxDetails, Arg3

), Rcp(RcpDetails, Arg2

), - And(OrAndType, Arg3

), + And(BooleanType, Arg3

), Selp(SelpType, Arg4

), Bar(BarDetails, Arg1Bar

), Atom(AtomDetails, Arg3

), @@ -590,6 +590,9 @@ pub enum Instruction { Clz { typ: BitType, arg: Arg2

}, Brev { typ: BitType, arg: Arg2

}, Popc { typ: BitType, arg: Arg2

}, + Xor { typ: BooleanType, arg: Arg3

}, + Bfe { typ: IntType, arg: Arg4

}, + Rem { typ: IntType, arg: Arg3

}, } #[derive(Copy, Clone)] @@ -896,14 +899,6 @@ pub struct SetpBoolData { pub bool_op: SetpBoolPostOp, } -#[derive(PartialEq, Eq, Copy, Clone)] -pub enum NotType { - Pred, - B16, - B32, - B64, -} - pub struct BraData { pub uniform: bool, } @@ -1058,7 +1053,7 @@ pub struct RetData { pub uniform: bool, } -sub_enum!(OrAndType { +sub_enum!(BooleanType { Pred, B16, B32, diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index cd1c642..6c231b2 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -142,6 +142,7 @@ match { "atom", "bar", "barrier", + "bfe", "bra", "brev", "call", @@ -166,6 +167,7 @@ match { "or", "popc", "rcp", + "rem", "ret", "rsqrt", "selp", @@ -179,6 +181,7 @@ match { "sub", "texmode_independent", "texmode_unified", + "xor", } else { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers r"[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+" => ID, @@ -192,6 +195,7 @@ ExtendedID : &'input str = { "atom", "bar", "barrier", + "bfe", "bra", "brev", "call", @@ -216,6 +220,7 @@ ExtendedID : &'input str = { "or", "popc", "rcp", + "rem", "ret", "rsqrt", "selp", @@ -229,6 +234,7 @@ ExtendedID : &'input str = { "sub", "texmode_independent", "texmode_unified", + "xor", ID } @@ -708,6 +714,9 @@ Instruction: ast::Instruction> = { InstClz, InstBrev, InstPopc, + InstXor, + InstRem, + InstBfe, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld @@ -874,6 +883,13 @@ IntType : ast::IntType = { ".s64" => ast::IntType::S64, }; +IntType3264: ast::IntType = { + ".u32" => ast::IntType::U32, + ".u64" => ast::IntType::U64, + ".s32" => ast::IntType::S32, + ".s64" => ast::IntType::S64, +} + UIntType: ast::UIntType = { ".u16" => ast::UIntType::U16, ".u32" => ast::UIntType::U32, @@ -979,14 +995,14 @@ SetpTypeNoF32: ast::ScalarType = { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not InstNot: ast::Instruction> = { - "not" => ast::Instruction::Not(t, a) + "not" => ast::Instruction::Not(t, a) }; -NotType: ast::NotType = { - ".pred" => ast::NotType::Pred, - ".b16" => ast::NotType::B16, - ".b32" => ast::NotType::B32, - ".b64" => ast::NotType::B64, +BooleanType: ast::BooleanType = { + ".pred" => ast::BooleanType::Pred, + ".b16" => ast::BooleanType::B16, + ".b32" => ast::BooleanType::B32, + ".b64" => ast::BooleanType::B64, }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at @@ -1294,19 +1310,12 @@ SignedIntType: ast::ScalarType = { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-or InstOr: ast::Instruction> = { - "or" => ast::Instruction::Or(d, a), + "or" => ast::Instruction::Or(d, a), }; -OrAndType: ast::OrAndType = { - ".pred" => ast::OrAndType::Pred, - ".b16" => ast::OrAndType::B16, - ".b32" => ast::OrAndType::B32, - ".b64" => ast::OrAndType::B64, -} - // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-and InstAnd: ast::Instruction> = { - "and" => ast::Instruction::And(d, a), + "and" => ast::Instruction::And(d, a), }; // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp @@ -1447,7 +1456,7 @@ InstAtom: ast::Instruction> = { }; ast::Instruction::Atom(details,a) }, - "atom" => { + "atom" => { let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), @@ -1456,7 +1465,7 @@ InstAtom: ast::Instruction> = { }; ast::Instruction::Atom(details,a) }, - "atom" => { + "atom" => { let details = ast::AtomDetails { semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed), scope: scope.unwrap_or(ast::MemScope::Gpu), @@ -1515,12 +1524,12 @@ BitType: ast::BitType = { ".b64" => ast::BitType::B64, } -AtomUIntType: ast::UIntType = { +UIntType3264: ast::UIntType = { ".u32" => ast::UIntType::U32, ".u64" => ast::UIntType::U64, } -AtomSIntType: ast::SIntType = { +SIntType3264: ast::SIntType = { ".s32" => ast::SIntType::S32, ".s64" => ast::SIntType::S64, } @@ -1664,6 +1673,22 @@ InstPopc: ast::Instruction> = { "popc" => ast::Instruction::Popc{ <> } } +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-xor +InstXor: ast::Instruction> = { + "xor" => ast::Instruction::Xor{ <> } +} + +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfe +InstBfe: ast::Instruction> = { + "bfe" => ast::Instruction::Bfe{ <> } +} + +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-rem +InstRem: ast::Instruction> = { + "rem" => ast::Instruction::Rem{ <> } +} + + NegTypeFtz: ast::ScalarType = { ".f16" => ast::ScalarType::F16, ".f16x2" => ast::ScalarType::F16x2, diff --git a/ptx/src/test/spirv_run/bfe.ptx b/ptx/src/test/spirv_run/bfe.ptx new file mode 100644 index 0000000..60ee8a6 --- /dev/null +++ b/ptx/src/test/spirv_run/bfe.ptx @@ -0,0 +1,23 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry bfe( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .u32 temp<3>; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.u32 temp0, [in_addr]; + ld.u32 temp1, [in_addr+4]; + ld.u32 temp2, [in_addr+8]; + bfe.u32 temp0, temp0, temp1, temp2; + st.u32 [out_addr], temp0; + ret; +} diff --git a/ptx/src/test/spirv_run/bfe.spvtxt b/ptx/src/test/spirv_run/bfe.spvtxt new file mode 100644 index 0000000..edcf138 --- /dev/null +++ b/ptx/src/test/spirv_run/bfe.spvtxt @@ -0,0 +1,70 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int8 + OpCapability Int16 + OpCapability Int64 + OpCapability Float16 + OpCapability Float64 + %40 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "bfe" + OpDecorate %34 LinkageAttributes "__notcuda_ptx_impl__bfe_u32" Import + %void = OpTypeVoid + %uint = OpTypeInt 32 0 + %43 = OpTypeFunction %uint %uint %uint %uint + %ulong = OpTypeInt 64 0 + %45 = OpTypeFunction %void %ulong %ulong +%_ptr_Function_ulong = OpTypePointer Function %ulong +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Generic_uint = OpTypePointer Generic %uint + %ulong_4 = OpConstant %ulong 4 + %ulong_8 = OpConstant %ulong 8 + %34 = OpFunction %uint None %43 + %36 = OpFunctionParameter %uint + %37 = OpFunctionParameter %uint + %38 = OpFunctionParameter %uint + OpFunctionEnd + %1 = OpFunction %void None %45 + %9 = OpFunctionParameter %ulong + %10 = OpFunctionParameter %ulong + %33 = OpLabel + %2 = OpVariable %_ptr_Function_ulong Function + %3 = OpVariable %_ptr_Function_ulong Function + %4 = OpVariable %_ptr_Function_ulong Function + %5 = OpVariable %_ptr_Function_ulong Function + %6 = OpVariable %_ptr_Function_uint Function + %7 = OpVariable %_ptr_Function_uint Function + %8 = OpVariable %_ptr_Function_uint Function + OpStore %2 %9 + OpStore %3 %10 + %11 = OpLoad %ulong %2 + OpStore %4 %11 + %12 = OpLoad %ulong %3 + OpStore %5 %12 + %14 = OpLoad %ulong %4 + %29 = OpConvertUToPtr %_ptr_Generic_uint %14 + %13 = OpLoad %uint %29 + OpStore %6 %13 + %16 = OpLoad %ulong %4 + %26 = OpIAdd %ulong %16 %ulong_4 + %30 = OpConvertUToPtr %_ptr_Generic_uint %26 + %15 = OpLoad %uint %30 + OpStore %7 %15 + %18 = OpLoad %ulong %4 + %28 = OpIAdd %ulong %18 %ulong_8 + %31 = OpConvertUToPtr %_ptr_Generic_uint %28 + %17 = OpLoad %uint %31 + OpStore %8 %17 + %20 = OpLoad %uint %6 + %21 = OpLoad %uint %7 + %22 = OpLoad %uint %8 + %19 = OpFunctionCall %uint %34 %20 %21 %22 + OpStore %6 %19 + %23 = OpLoad %ulong %5 + %24 = OpLoad %uint %6 + %32 = OpConvertUToPtr %_ptr_Generic_uint %23 + OpStore %32 %24 + OpReturn + OpFunctionEnd diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index a7ef75b..5bbe45a 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -116,6 +116,20 @@ test_ptx!( [0b11000111_01011100_10101110_11111011u32], [0b11011111_01110101_00111010_11100011u32] ); +test_ptx!( + xor, + [ + 0b01010010_00011010_01000000_00001101u32, + 0b11100110_10011011_00001100_00100011u32 + ], + [0b10110100100000010100110000101110u32] +); +test_ptx!(rem, [21692i32, 13i32], [8i32]); +test_ptx!( + bfe, + [0b11111000_11000001_00100010_10100000u32, 16u32, 8u32], + [0b11000001u32] +); struct DisplayError { err: T, diff --git a/ptx/src/test/spirv_run/rem.ptx b/ptx/src/test/spirv_run/rem.ptx new file mode 100644 index 0000000..2ac482d --- /dev/null +++ b/ptx/src/test/spirv_run/rem.ptx @@ -0,0 +1,23 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry rem( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .s32 temp1; + .reg .s32 temp2; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.s32 temp1, [in_addr]; + ld.s32 temp2, [in_addr+4]; + rem.s32 temp1, temp1, temp2; + st.s32 [out_addr], temp1; + ret; +} diff --git a/ptx/src/test/spirv_run/rem.spvtxt b/ptx/src/test/spirv_run/rem.spvtxt new file mode 100644 index 0000000..72d0965 --- /dev/null +++ b/ptx/src/test/spirv_run/rem.spvtxt @@ -0,0 +1,55 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int8 + OpCapability Int16 + OpCapability Int64 + OpCapability Float16 + OpCapability Float64 + %28 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "rem" + %void = OpTypeVoid + %ulong = OpTypeInt 64 0 + %31 = OpTypeFunction %void %ulong %ulong +%_ptr_Function_ulong = OpTypePointer Function %ulong + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Generic_uint = OpTypePointer Generic %uint + %ulong_4 = OpConstant %ulong 4 + %1 = OpFunction %void None %31 + %8 = OpFunctionParameter %ulong + %9 = OpFunctionParameter %ulong + %26 = OpLabel + %2 = OpVariable %_ptr_Function_ulong Function + %3 = OpVariable %_ptr_Function_ulong Function + %4 = OpVariable %_ptr_Function_ulong Function + %5 = OpVariable %_ptr_Function_ulong Function + %6 = OpVariable %_ptr_Function_uint Function + %7 = OpVariable %_ptr_Function_uint Function + OpStore %2 %8 + OpStore %3 %9 + %10 = OpLoad %ulong %2 + OpStore %4 %10 + %11 = OpLoad %ulong %3 + OpStore %5 %11 + %13 = OpLoad %ulong %4 + %23 = OpConvertUToPtr %_ptr_Generic_uint %13 + %12 = OpLoad %uint %23 + OpStore %6 %12 + %15 = OpLoad %ulong %4 + %22 = OpIAdd %ulong %15 %ulong_4 + %24 = OpConvertUToPtr %_ptr_Generic_uint %22 + %14 = OpLoad %uint %24 + OpStore %7 %14 + %17 = OpLoad %uint %6 + %18 = OpLoad %uint %7 + %16 = OpSMod %uint %17 %18 + OpStore %6 %16 + %19 = OpLoad %ulong %5 + %20 = OpLoad %uint %6 + %25 = OpConvertUToPtr %_ptr_Generic_uint %19 + OpStore %25 %20 + OpReturn + OpFunctionEnd diff --git a/ptx/src/test/spirv_run/xor.ptx b/ptx/src/test/spirv_run/xor.ptx new file mode 100644 index 0000000..a28b321 --- /dev/null +++ b/ptx/src/test/spirv_run/xor.ptx @@ -0,0 +1,23 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry xor( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .b32 temp1; + .reg .b32 temp2; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.b32 temp1, [in_addr]; + ld.b32 temp2, [in_addr+4]; + xor.b32 temp1, temp1, temp2; + st.b32 [out_addr], temp1; + ret; +} diff --git a/ptx/src/test/spirv_run/xor.spvtxt b/ptx/src/test/spirv_run/xor.spvtxt new file mode 100644 index 0000000..ee09898 --- /dev/null +++ b/ptx/src/test/spirv_run/xor.spvtxt @@ -0,0 +1,55 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int8 + OpCapability Int16 + OpCapability Int64 + OpCapability Float16 + OpCapability Float64 + %28 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "xor" + %void = OpTypeVoid + %ulong = OpTypeInt 64 0 + %31 = OpTypeFunction %void %ulong %ulong +%_ptr_Function_ulong = OpTypePointer Function %ulong + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint +%_ptr_Generic_uint = OpTypePointer Generic %uint + %ulong_4 = OpConstant %ulong 4 + %1 = OpFunction %void None %31 + %8 = OpFunctionParameter %ulong + %9 = OpFunctionParameter %ulong + %26 = OpLabel + %2 = OpVariable %_ptr_Function_ulong Function + %3 = OpVariable %_ptr_Function_ulong Function + %4 = OpVariable %_ptr_Function_ulong Function + %5 = OpVariable %_ptr_Function_ulong Function + %6 = OpVariable %_ptr_Function_uint Function + %7 = OpVariable %_ptr_Function_uint Function + OpStore %2 %8 + OpStore %3 %9 + %10 = OpLoad %ulong %2 + OpStore %4 %10 + %11 = OpLoad %ulong %3 + OpStore %5 %11 + %13 = OpLoad %ulong %4 + %23 = OpConvertUToPtr %_ptr_Generic_uint %13 + %12 = OpLoad %uint %23 + OpStore %6 %12 + %15 = OpLoad %ulong %4 + %22 = OpIAdd %ulong %15 %ulong_4 + %24 = OpConvertUToPtr %_ptr_Generic_uint %22 + %14 = OpLoad %uint %24 + OpStore %7 %14 + %17 = OpLoad %uint %6 + %18 = OpLoad %uint %7 + %16 = OpBitwiseXor %uint %17 %18 + OpStore %6 %16 + %19 = OpLoad %ulong %5 + %20 = OpLoad %uint %6 + %25 = OpConvertUToPtr %_ptr_Generic_uint %19 + OpStore %25 %20 + OpReturn + OpFunctionEnd diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index 23a63be..365d1e8 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -1289,6 +1289,9 @@ fn extract_globals<'input, 'b>( .. }, ) => global.push(var), + Statement::Instruction(ast::Instruction::Bfe { typ, arg }) => { + local.push(to_ptx_impl_bfe_call(id_def, ptx_impl_imports, typ, arg)); + } Statement::Instruction(ast::Instruction::Atom( d @ @@ -1591,6 +1594,24 @@ fn convert_to_typed_statements( arg: arg.cast(), })) } + ast::Instruction::Xor { typ, arg } => { + result.push(Statement::Instruction(ast::Instruction::Xor { + typ, + arg: arg.cast(), + })) + } + ast::Instruction::Bfe { typ, arg } => { + result.push(Statement::Instruction(ast::Instruction::Bfe { + typ, + arg: arg.cast(), + })) + } + ast::Instruction::Rem { typ, arg } => { + result.push(Statement::Instruction(ast::Instruction::Rem { + typ, + arg: arg.cast(), + })) + } }, Statement::Label(i) => result.push(Statement::Label(i)), Statement::Variable(v) => result.push(Statement::Variable(v)), @@ -1610,6 +1631,7 @@ fn convert_to_typed_statements( Ok(result) } +//TODO: share common code between this and to_ptx_impl_bfe_call fn to_ptx_impl_atomic_call( id_defs: &mut NumericIdResolver, ptx_impl_imports: &mut HashMap, @@ -1705,6 +1727,100 @@ fn to_ptx_impl_atomic_call( }) } +fn to_ptx_impl_bfe_call( + id_defs: &mut NumericIdResolver, + ptx_impl_imports: &mut HashMap, + typ: ast::IntType, + arg: ast::Arg4, +) -> ExpandedStatement { + let prefix = "__notcuda_ptx_impl__"; + let suffix = match typ { + ast::IntType::U32 => "bfe_u32", + ast::IntType::U64 => "bfe_u64", + ast::IntType::S32 => "bfe_s32", + ast::IntType::S64 => "bfe_s64", + _ => unreachable!(), + }; + let fn_name = format!("{}{}", prefix, suffix); + let fn_id = match ptx_impl_imports.entry(fn_name) { + hash_map::Entry::Vacant(entry) => { + let fn_id = id_defs.new_id(None); + let func_decl = ast::MethodDecl::Func::( + vec![ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + name: id_defs.new_id(None), + array_init: Vec::new(), + }], + fn_id, + vec![ + ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + name: id_defs.new_id(None), + array_init: Vec::new(), + }, + ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar( + ast::ScalarType::U32, + )), + name: id_defs.new_id(None), + array_init: Vec::new(), + }, + ast::FnArgument { + align: None, + v_type: ast::FnArgumentType::Reg(ast::VariableRegType::Scalar( + ast::ScalarType::U32, + )), + name: id_defs.new_id(None), + array_init: Vec::new(), + }, + ], + ); + let spirv_decl = SpirvMethodDecl::new(&func_decl); + let func = Function { + func_decl, + globals: Vec::new(), + body: None, + import_as: Some(entry.key().clone()), + spirv_decl, + }; + entry.insert(Directive::Method(func)); + fn_id + } + hash_map::Entry::Occupied(entry) => match entry.get() { + Directive::Method(Function { + func_decl: ast::MethodDecl::Func(_, name, _), + .. + }) => *name, + _ => unreachable!(), + }, + }; + Statement::Call(ResolvedCall { + uniform: false, + func: fn_id, + ret_params: vec![( + arg.dst, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + )], + param_list: vec![ + ( + arg.src1, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(typ.into())), + ), + ( + arg.src2, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(ast::ScalarType::U32)), + ), + ( + arg.src3, + ast::FnArgumentType::Reg(ast::VariableRegType::Scalar(ast::ScalarType::U32)), + ), + ], + }) +} + fn to_resolved_fn_args( params: Vec, params_decl: &[ast::FnArgumentType], @@ -2803,7 +2919,7 @@ fn emit_function_body_ops( let result_id = Some(a.dst); let operand = a.src; match t { - ast::NotType::Pred => { + ast::BooleanType::Pred => { // HACK ALERT // Temporary workaround until IGC gets its shit together // Currently IGC carries two copies of SPIRV-LLVM translator @@ -2854,7 +2970,7 @@ fn emit_function_body_ops( }, ast::Instruction::Or(t, a) => { let result_type = map.get_or_add_scalar(builder, ast::ScalarType::from(*t)); - if *t == ast::OrAndType::Pred { + if *t == ast::BooleanType::Pred { builder.logical_or(result_type, Some(a.dst), a.src1, a.src2)?; } else { builder.bitwise_or(result_type, Some(a.dst), a.src1, a.src2)?; @@ -2882,7 +2998,7 @@ fn emit_function_body_ops( } ast::Instruction::And(t, a) => { let result_type = map.get_or_add_scalar(builder, ast::ScalarType::from(*t)); - if *t == ast::OrAndType::Pred { + if *t == ast::BooleanType::Pred { builder.logical_and(result_type, Some(a.dst), a.src1, a.src2)?; } else { builder.bitwise_and(result_type, Some(a.dst), a.src1, a.src2)?; @@ -3033,6 +3149,39 @@ fn emit_function_body_ops( let result_type = map.get_or_add_scalar(builder, (*typ).into()); builder.bit_count(result_type, Some(arg.dst), arg.src)?; } + ast::Instruction::Xor { typ, arg } => { + let builder_fn = match typ { + ast::BooleanType::Pred => emit_logical_xor_spirv, + _ => dr::Builder::bitwise_xor, + }; + let result_type = map.get_or_add_scalar(builder, (*typ).into()); + builder_fn(builder, result_type, Some(arg.dst), arg.src1, arg.src2)?; + } + ast::Instruction::Bfe { typ, arg } => { + let builder_fn = if typ.is_signed() { + dr::Builder::bit_field_s_extract + } else { + dr::Builder::bit_field_u_extract + }; + let result_type = map.get_or_add_scalar(builder, (*typ).into()); + builder_fn( + builder, + result_type, + Some(arg.dst), + arg.src1, + arg.src2, + arg.src3, + )?; + } + ast::Instruction::Rem { typ, arg } => { + let builder_fn = if typ.is_signed() { + dr::Builder::s_mod + } else { + dr::Builder::u_mod + }; + let result_type = map.get_or_add_scalar(builder, (*typ).into()); + builder_fn(builder, result_type, Some(arg.dst), arg.src1, arg.src2)?; + } }, Statement::LoadVar(arg, typ) => { let type_id = map.get_or_add(builder, SpirvType::from(typ.clone())); @@ -3079,6 +3228,20 @@ fn emit_function_body_ops( Ok(()) } +// TODO: check what kind of assembly do we emit +fn emit_logical_xor_spirv( + builder: &mut dr::Builder, + result_type: spirv::Word, + result_id: Option, + op1: spirv::Word, + op2: spirv::Word, +) -> Result { + let temp_or = builder.logical_or(result_type, None, op1, op2)?; + let temp_and = builder.logical_and(result_type, None, op1, op2)?; + let temp_neg = builder.logical_not(result_type, None, temp_and)?; + builder.logical_and(result_type, result_id, temp_or, temp_neg) +} + fn emit_sqrt( builder: &mut dr::Builder, map: &mut TypeWordMap, @@ -5039,6 +5202,27 @@ impl ast::Instruction { arg: arg.map_different_types(visitor, &dst_type, &src_type)?, } } + ast::Instruction::Xor { typ, arg } => { + let full_type = ast::Type::Scalar(typ.into()); + ast::Instruction::Xor { + typ, + arg: arg.map_non_shift(visitor, &full_type, false)?, + } + } + ast::Instruction::Bfe { typ, arg } => { + let full_type = ast::Type::Scalar(typ.into()); + ast::Instruction::Bfe { + typ, + arg: arg.map_bfe(visitor, &full_type)?, + } + } + ast::Instruction::Rem { typ, arg } => { + let full_type = ast::Type::Scalar(typ.into()); + ast::Instruction::Rem { + typ, + arg: arg.map_non_shift(visitor, &full_type, false)?, + } + } }) } } @@ -5351,6 +5535,9 @@ impl ast::Instruction { ast::Instruction::Clz { .. } => None, ast::Instruction::Brev { .. } => None, ast::Instruction::Popc { .. } => None, + ast::Instruction::Xor { .. } => None, + ast::Instruction::Bfe { .. } => None, + ast::Instruction::Rem { .. } => None, ast::Instruction::Sub(ast::ArithDetails::Float(float_control), _) | ast::Instruction::Add(ast::ArithDetails::Float(float_control), _) | ast::Instruction::Mul(ast::MulDetails::Float(float_control), _) @@ -6192,6 +6379,52 @@ impl ast::Arg4 { src3, }) } + + fn map_bfe>( + self, + visitor: &mut V, + typ: &ast::Type, + ) -> Result, TranslateError> { + let dst = visitor.id( + ArgumentDescriptor { + op: self.dst, + is_dst: true, + sema: ArgumentSemantics::Default, + }, + Some(typ), + )?; + let src1 = visitor.operand( + ArgumentDescriptor { + op: self.src1, + is_dst: false, + sema: ArgumentSemantics::Default, + }, + typ, + )?; + let u32_type = ast::Type::Scalar(ast::ScalarType::U32); + let src2 = visitor.operand( + ArgumentDescriptor { + op: self.src2, + is_dst: false, + sema: ArgumentSemantics::Default, + }, + &u32_type, + )?; + let src3 = visitor.operand( + ArgumentDescriptor { + op: self.src3, + is_dst: false, + sema: ArgumentSemantics::Default, + }, + &u32_type, + )?; + Ok(ast::Arg4 { + dst, + src1, + src2, + src3, + }) + } } impl ast::Arg4Setp { @@ -6437,13 +6670,13 @@ impl ast::ScalarType { } } -impl ast::NotType { +impl ast::BooleanType { fn to_type(self) -> ast::Type { match self { - ast::NotType::Pred => ast::Type::Scalar(ast::ScalarType::Pred), - ast::NotType::B16 => ast::Type::Scalar(ast::ScalarType::B16), - ast::NotType::B32 => ast::Type::Scalar(ast::ScalarType::B32), - ast::NotType::B64 => ast::Type::Scalar(ast::ScalarType::B64), + ast::BooleanType::Pred => ast::Type::Scalar(ast::ScalarType::Pred), + ast::BooleanType::B16 => ast::Type::Scalar(ast::ScalarType::B16), + ast::BooleanType::B32 => ast::Type::Scalar(ast::ScalarType::B32), + ast::BooleanType::B64 => ast::Type::Scalar(ast::ScalarType::B64), } } }