mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-07-18 17:56:22 +03:00
Document weird implicit PTX coercions
This commit is contained in:
21
doc/NOTES.md
21
doc/NOTES.md
@ -1,3 +1,5 @@
|
|||||||
|
Parser generators in Rust:
|
||||||
|
--------------------------
|
||||||
I'm convinced nobody actually uses parser generators in Rust:
|
I'm convinced nobody actually uses parser generators in Rust:
|
||||||
* pomelo can't generate lexer (understandable, as it is a port of lemon and lemon can't do this either)
|
* pomelo can't generate lexer (understandable, as it is a port of lemon and lemon can't do this either)
|
||||||
* pest can't do parse actions, you have to convert your parse tree to ast manually
|
* pest can't do parse actions, you have to convert your parse tree to ast manually
|
||||||
@ -9,15 +11,17 @@ I'm convinced nobody actually uses parser generators in Rust:
|
|||||||
* no library supports island grammars
|
* no library supports island grammars
|
||||||
|
|
||||||
What to emit?
|
What to emit?
|
||||||
|
-------------
|
||||||
* SPIR-V
|
* SPIR-V
|
||||||
* Better library support, easier to emit
|
* Better library support, easier to emit
|
||||||
* Can by optimized by IGC
|
* Can by optimized by IGC
|
||||||
* Can't do some things (not sure what exactly yet)
|
* Can't do some things (not sure what exactly yet)
|
||||||
* But we can work around things with inline VISA
|
* But we can work around with inline VISA
|
||||||
* VISA
|
* VISA
|
||||||
* Quicker compilation
|
* Quicker compilation
|
||||||
|
|
||||||
A64 vs BTS
|
A64 vs BTS
|
||||||
|
----------
|
||||||
* How to force A64: -cl-intel-greater-than-4GB-buffer-required
|
* How to force A64: -cl-intel-greater-than-4GB-buffer-required
|
||||||
* PTX made a baffling desing choice: global pointers are represented as untyped 64bit integers
|
* PTX made a baffling desing choice: global pointers are represented as untyped 64bit integers
|
||||||
* Consequently, there's no 100% certain way to know which argument is a surface and which is a scalar
|
* Consequently, there's no 100% certain way to know which argument is a surface and which is a scalar
|
||||||
@ -30,3 +34,18 @@ A64 vs BTS
|
|||||||
* Potential solution: compile only during the dispatch, when type of arguments is known?
|
* Potential solution: compile only during the dispatch, when type of arguments is known?
|
||||||
* Can't do, the set of arguments passed to cuLaunchKernel is untyped
|
* Can't do, the set of arguments passed to cuLaunchKernel is untyped
|
||||||
* Solution: treat all arguments as untyped integers and say goodbye to BTS access
|
* Solution: treat all arguments as untyped integers and say goodbye to BTS access
|
||||||
|
|
||||||
|
Implicit conversions
|
||||||
|
--------------------
|
||||||
|
* PTX support for implicit conversions is completely degenerate, docs say:
|
||||||
|
_For convenience, ld, st, and cvt instructions permit source and destination data operands to be wider than the instruction-type size, so that narrow values may be loaded, stored, and converted using regular-width registers. For example, 8-bit or 16-bit values may be held directly in 32-bit or 64-bit registers when being loaded, stored, or converted to other types and sizes_
|
||||||
|
Which is sensible, but completely untrue. In reality ptxas compiles silly code like this:
|
||||||
|
```
|
||||||
|
param.f32 param_1
|
||||||
|
...
|
||||||
|
.reg.s32 %r1
|
||||||
|
ld.param.b16 %r1, [param_1];
|
||||||
|
```
|
||||||
|
* Surprise, surprise, there's two kind of implicit conversions at play in the example above:
|
||||||
|
* "Relaxed type-checking rules": this is the conversion of b16 operation type to s32 dst register
|
||||||
|
* Undocumented type coercion when dereferencing param_1. The PTX behaviour is to coerce **every** type. It's something like `[param_1] = *(b16*)param_1`
|
31
ptx/tools/implicit_ld_dst.py
Normal file
31
ptx/tools/implicit_ld_dst.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
types = ["b8", "b16", "b32", "b64", "u8", "u16", "u32", "u64", "s8", "s16", "s32", "s64", "f32", "f64"]
|
||||||
|
|
||||||
|
for op_type in types:
|
||||||
|
for output_type in types:
|
||||||
|
with tempfile.TemporaryDirectory() as dir:
|
||||||
|
f_name = os.path.join(dir, 'ptx')
|
||||||
|
out_name = os.path.join(dir, 'out')
|
||||||
|
with open(f_name, 'w') as f:
|
||||||
|
f.write(
|
||||||
|
f"""
|
||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
.visible .entry VecAdd_kernel(
|
||||||
|
.param .{op_type} input
|
||||||
|
)
|
||||||
|
{{
|
||||||
|
.reg.{output_type} r1;
|
||||||
|
ld.param.{op_type} r1, [input];
|
||||||
|
ret;
|
||||||
|
}}
|
||||||
|
""")
|
||||||
|
err = subprocess.run(f"ptxas {f_name} -o {out_name}", capture_output = True)
|
||||||
|
if err.returncode == 0:
|
||||||
|
print(f"{op_type} {output_type}")
|
||||||
|
else:
|
||||||
|
print(f"[INVALID] {op_type} {output_type}")
|
31
ptx/tools/implicit_ld_src.py
Normal file
31
ptx/tools/implicit_ld_src.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
types = ["b8", "b16", "b32", "b64", "u8", "u16", "u32", "u64", "s8", "s16", "s32", "s64", "f32", "f64"]
|
||||||
|
|
||||||
|
for input_type in types:
|
||||||
|
for op_type in types:
|
||||||
|
with tempfile.TemporaryDirectory() as dir:
|
||||||
|
f_name = os.path.join(dir, 'ptx')
|
||||||
|
out_name = os.path.join(dir, 'out')
|
||||||
|
with open(f_name, 'w') as f:
|
||||||
|
f.write(
|
||||||
|
f"""
|
||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
.visible .entry VecAdd_kernel(
|
||||||
|
.param .{input_type} input
|
||||||
|
)
|
||||||
|
{{
|
||||||
|
.reg.{op_type} r1;
|
||||||
|
ld.param.{op_type} r1, [input];
|
||||||
|
ret;
|
||||||
|
}}
|
||||||
|
""")
|
||||||
|
err = subprocess.run(f"ptxas {f_name} -o {out_name}")
|
||||||
|
if err.returncode == 0:
|
||||||
|
print(f"{op_type} {input_type}")
|
||||||
|
else:
|
||||||
|
print(f"[INVALID] {op_type} {input_type}")
|
Reference in New Issue
Block a user