|
|
|
@ -1,4 +1,4 @@
|
|
|
|
|
/* auto-generated on 2024-01-29 10:40:15 -0500. Do not edit! */
|
|
|
|
|
/* auto-generated on 2024-05-07 22:33:11 -0400. Do not edit! */
|
|
|
|
|
/* begin file include/simdutf.h */
|
|
|
|
|
#ifndef SIMDUTF_H
|
|
|
|
|
#define SIMDUTF_H
|
|
|
|
@ -130,9 +130,9 @@
|
|
|
|
|
#include <iso646.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if defined(__x86_64__) || defined(_M_AMD64)
|
|
|
|
|
#if (defined(__x86_64__) || defined(_M_AMD64)) && !defined(_M_ARM64EC)
|
|
|
|
|
#define SIMDUTF_IS_X86_64 1
|
|
|
|
|
#elif defined(__aarch64__) || defined(_M_ARM64)
|
|
|
|
|
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
|
|
|
|
#define SIMDUTF_IS_ARM64 1
|
|
|
|
|
#elif defined(__PPC64__) || defined(_M_PPC64)
|
|
|
|
|
//#define SIMDUTF_IS_PPC64 1
|
|
|
|
@ -142,6 +142,30 @@
|
|
|
|
|
// s390 IBM system. Big endian.
|
|
|
|
|
#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
|
|
|
|
|
// RISC-V 64-bit
|
|
|
|
|
#define SIMDUTF_IS_RISCV64 1
|
|
|
|
|
|
|
|
|
|
#if __clang_major__ >= 19
|
|
|
|
|
// Does the compiler support target regions for RISC-V
|
|
|
|
|
#define SIMDUTF_HAS_RVV_TARGET_REGION 1
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if __riscv_v_intrinsic >= 11000
|
|
|
|
|
#define SIMDUTF_HAS_RVV_INTRINSICS 1
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#define SIMDUTF_HAS_ZVBB_INTRINSICS 0 // there is currently no way to detect this
|
|
|
|
|
|
|
|
|
|
#if SIMDUTF_HAS_RVV_INTRINSICS && __riscv_vector && __riscv_v_min_vlen >= 128 && __riscv_v_elen >= 64
|
|
|
|
|
// RISC-V V extension
|
|
|
|
|
#define SIMDUTF_IS_RVV 1
|
|
|
|
|
#if SIMDUTF_HAS_ZVBB_INTRINSICS && __riscv_zvbb >= 1000000
|
|
|
|
|
// RISC-V Vector Basic Bit-manipulation
|
|
|
|
|
#define SIMDUTF_IS_ZVBB 1
|
|
|
|
|
#endif
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#elif defined(__loongarch_lp64)
|
|
|
|
|
// LoongArch 64-bit
|
|
|
|
|
#else
|
|
|
|
|
// The simdutf library is designed
|
|
|
|
|
// for 64-bit processors and it seems that you are not
|
|
|
|
@ -540,6 +564,9 @@ enum error_code {
|
|
|
|
|
SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR
|
|
|
|
|
// a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR
|
|
|
|
|
// there must be no surrogate at all (Latin1)
|
|
|
|
|
INVALID_BASE64_CHARACTER, // Found a character that cannot be part of a valid base64 string.
|
|
|
|
|
BASE64_INPUT_REMAINDER, // The base64 input terminates with a single character, excluding padding (=).
|
|
|
|
|
OUTPUT_BUFFER_TOO_SMALL, // The provided buffer is too small.
|
|
|
|
|
OTHER // Not related to validation/transcoding.
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
@ -547,9 +574,9 @@ struct result {
|
|
|
|
|
error_code error;
|
|
|
|
|
size_t count; // In case of error, indicates the position of the error. In case of success, indicates the number of code units validated/written.
|
|
|
|
|
|
|
|
|
|
simdutf_really_inline result();
|
|
|
|
|
simdutf_really_inline result() : error{error_code::SUCCESS}, count{0} {}
|
|
|
|
|
|
|
|
|
|
simdutf_really_inline result(error_code, size_t);
|
|
|
|
|
simdutf_really_inline result(error_code _err, size_t _pos) : error{_err}, count{_pos} {}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
@ -567,22 +594,22 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
|
|
|
|
|
#define SIMDUTF_SIMDUTF_VERSION_H
|
|
|
|
|
|
|
|
|
|
/** The version of simdutf being used (major.minor.revision) */
|
|
|
|
|
#define SIMDUTF_VERSION "4.0.9"
|
|
|
|
|
#define SIMDUTF_VERSION "5.2.8"
|
|
|
|
|
|
|
|
|
|
namespace simdutf {
|
|
|
|
|
enum {
|
|
|
|
|
/**
|
|
|
|
|
* The major version (MAJOR.minor.revision) of simdutf being used.
|
|
|
|
|
*/
|
|
|
|
|
SIMDUTF_VERSION_MAJOR = 4,
|
|
|
|
|
SIMDUTF_VERSION_MAJOR = 5,
|
|
|
|
|
/**
|
|
|
|
|
* The minor version (major.MINOR.revision) of simdutf being used.
|
|
|
|
|
*/
|
|
|
|
|
SIMDUTF_VERSION_MINOR = 0,
|
|
|
|
|
SIMDUTF_VERSION_MINOR = 2,
|
|
|
|
|
/**
|
|
|
|
|
* The revision (major.minor.REVISION) of simdutf being used.
|
|
|
|
|
*/
|
|
|
|
|
SIMDUTF_VERSION_REVISION = 9
|
|
|
|
|
SIMDUTF_VERSION_REVISION = 8
|
|
|
|
|
};
|
|
|
|
|
} // namespace simdutf
|
|
|
|
|
|
|
|
|
@ -654,6 +681,18 @@ POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
#include <cpuid.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// RISC-V ISA detection utilities
|
|
|
|
|
#if SIMDUTF_IS_RISCV64 && defined(__linux__)
|
|
|
|
|
#include <unistd.h> // for syscall
|
|
|
|
|
// We define these ourselves, for backwards compatibility
|
|
|
|
|
struct simdutf_riscv_hwprobe { int64_t key; uint64_t value; };
|
|
|
|
|
#define simdutf_riscv_hwprobe(...) syscall(258, __VA_ARGS__)
|
|
|
|
|
#define SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0 4
|
|
|
|
|
#define SIMDUTF_RISCV_HWPROBE_IMA_V (1 << 2)
|
|
|
|
|
#define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17)
|
|
|
|
|
#endif // SIMDUTF_IS_RISCV64 && defined(__linux__)
|
|
|
|
|
|
|
|
|
|
namespace simdutf {
|
|
|
|
|
namespace internal {
|
|
|
|
|
|
|
|
|
@ -675,7 +714,9 @@ enum instruction_set {
|
|
|
|
|
AVX512BW = 0x4000,
|
|
|
|
|
AVX512VL = 0x8000,
|
|
|
|
|
AVX512VBMI2 = 0x10000,
|
|
|
|
|
AVX512VPOPCNTDQ = 0x2000
|
|
|
|
|
AVX512VPOPCNTDQ = 0x2000,
|
|
|
|
|
RVV = 0x4000,
|
|
|
|
|
ZVBB = 0x8000,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#if defined(__PPC64__)
|
|
|
|
@ -684,7 +725,35 @@ static inline uint32_t detect_supported_architectures() {
|
|
|
|
|
return instruction_set::ALTIVEC;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#elif defined(__aarch64__) || defined(_M_ARM64)
|
|
|
|
|
#elif SIMDUTF_IS_RISCV64
|
|
|
|
|
|
|
|
|
|
static inline uint32_t detect_supported_architectures() {
|
|
|
|
|
uint32_t host_isa = instruction_set::DEFAULT;
|
|
|
|
|
#if SIMDUTF_IS_RVV
|
|
|
|
|
host_isa |= instruction_set::RVV;
|
|
|
|
|
#endif
|
|
|
|
|
#if SIMDUTF_IS_ZVBB
|
|
|
|
|
host_isa |= instruction_set::ZVBB;
|
|
|
|
|
#endif
|
|
|
|
|
#if defined(__linux__)
|
|
|
|
|
simdutf_riscv_hwprobe probes[] = { { SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0, 0 } };
|
|
|
|
|
long ret = simdutf_riscv_hwprobe(&probes, sizeof probes/sizeof *probes, 0, nullptr, 0);
|
|
|
|
|
if (ret == 0) {
|
|
|
|
|
uint64_t extensions = probes[0].value;
|
|
|
|
|
if (extensions & SIMDUTF_RISCV_HWPROBE_IMA_V)
|
|
|
|
|
host_isa |= instruction_set::RVV;
|
|
|
|
|
if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB)
|
|
|
|
|
host_isa |= instruction_set::ZVBB;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
#if defined(RUN_IN_SPIKE_SIMULATOR)
|
|
|
|
|
// Proxy Kernel does not implement yet hwprobe syscall
|
|
|
|
|
host_isa |= instruction_set::RVV;
|
|
|
|
|
#endif
|
|
|
|
|
return host_isa;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
|
|
|
|
|
|
|
|
|
static inline uint32_t detect_supported_architectures() {
|
|
|
|
|
return instruction_set::NEON;
|
|
|
|
@ -1106,7 +1175,7 @@ simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_
|
|
|
|
|
* @param input the UTF-8 string to convert
|
|
|
|
|
* @param length the length of the string in bytes
|
|
|
|
|
* @param latin1_output the pointer to buffer that can hold conversion result
|
|
|
|
|
* @return the number of written char; 0 if the input was not valid UTF-8 string
|
|
|
|
|
* @return the number of written char; 0 if the input was not valid UTF-8 string or if it cannot be represented as Latin1
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept;
|
|
|
|
|
|
|
|
|
@ -1163,6 +1232,8 @@ simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * input, size_t le
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert possibly broken UTF-8 string into latin1 string with errors.
|
|
|
|
|
* If the string cannot be represented as Latin1, an error
|
|
|
|
|
* code is returned.
|
|
|
|
|
*
|
|
|
|
|
* During the conversion also validation of the input string is done.
|
|
|
|
|
* This function is suitable to work with inputs from untrusted sources.
|
|
|
|
@ -1382,12 +1453,14 @@ simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * input, size_t
|
|
|
|
|
* @param input the UTF-16 string to convert
|
|
|
|
|
* @param length the length of the string in 2-byte code units (char16_t)
|
|
|
|
|
* @param latin1_buffer the pointer to buffer that can hold conversion result
|
|
|
|
|
* @return number of written code units; 0 if input is not a valid UTF-16LE string
|
|
|
|
|
* @return number of written code units; 0 if input is not a valid UTF-16 string or if it cannot be represented as Latin1
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused size_t convert_utf16_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert possibly broken UTF-16LE string into Latin1 string.
|
|
|
|
|
* If the string cannot be represented as Latin1, an error
|
|
|
|
|
* is returned.
|
|
|
|
|
*
|
|
|
|
|
* During the conversion also validation of the input string is done.
|
|
|
|
|
* This function is suitable to work with inputs from untrusted sources.
|
|
|
|
@ -1397,7 +1470,7 @@ simdutf_warn_unused size_t convert_utf16_to_latin1(const char16_t * input, size_
|
|
|
|
|
* @param input the UTF-16LE string to convert
|
|
|
|
|
* @param length the length of the string in 2-byte code units (char16_t)
|
|
|
|
|
* @param latin1_buffer the pointer to buffer that can hold conversion result
|
|
|
|
|
* @return number of written code units; 0 if input is not a valid UTF-16LE string
|
|
|
|
|
* @return number of written code units; 0 if input is not a valid UTF-16LE string or if it cannot be represented as Latin1
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
|
|
|
|
|
|
|
|
|
@ -1412,7 +1485,7 @@ simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * input, siz
|
|
|
|
|
* @param input the UTF-16BE string to convert
|
|
|
|
|
* @param length the length of the string in 2-byte code units (char16_t)
|
|
|
|
|
* @param latin1_buffer the pointer to buffer that can hold conversion result
|
|
|
|
|
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
|
|
|
|
|
* @return number of written code units; 0 if input is not a valid UTF-16BE string or if it cannot be represented as Latin1
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
|
|
|
|
|
|
|
|
|
@ -1477,6 +1550,8 @@ simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert possibly broken UTF-16BE string into Latin1 string.
|
|
|
|
|
* If the string cannot be represented as Latin1, an error
|
|
|
|
|
* is returned.
|
|
|
|
|
*
|
|
|
|
|
* During the conversion also validation of the input string is done.
|
|
|
|
|
* This function is suitable to work with inputs from untrusted sources.
|
|
|
|
@ -1887,13 +1962,14 @@ simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * input, size
|
|
|
|
|
* @param input the UTF-32 string to convert
|
|
|
|
|
* @param length the length of the string in 4-byte code units (char32_t)
|
|
|
|
|
* @param latin1_buffer the pointer to buffer that can hold conversion result
|
|
|
|
|
* @return number of written code units; 0 if input is not a valid UTF-32 string
|
|
|
|
|
* @return number of written code units; 0 if input is not a valid UTF-32 string or if it cannot be represented as Latin1
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert possibly broken UTF-32 string into Latin1 string and stop on error.
|
|
|
|
|
* If the string cannot be represented as Latin1, an error is returned.
|
|
|
|
|
*
|
|
|
|
|
* During the conversion also validation of the input string is done.
|
|
|
|
|
* This function is suitable to work with inputs from untrusted sources.
|
|
|
|
@ -2222,6 +2298,150 @@ simdutf_warn_unused size_t trim_partial_utf16le(const char16_t* input, size_t le
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t length);
|
|
|
|
|
|
|
|
|
|
// base64_options are used to specify the base64 encoding options.
|
|
|
|
|
using base64_options = uint64_t;
|
|
|
|
|
enum : base64_options {
|
|
|
|
|
base64_default = 0, /* standard base64 format */
|
|
|
|
|
base64_url = 1 /* base64url format*/
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Provide the maximal binary length in bytes given the base64 input.
|
|
|
|
|
* In general, if the input contains ASCII spaces, the result will be less than
|
|
|
|
|
* the maximum length.
|
|
|
|
|
*
|
|
|
|
|
* @param input the base64 input to process
|
|
|
|
|
* @param length the length of the base64 input in bytes
|
|
|
|
|
* @return maximum number of binary bytes
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char * input, size_t length) noexcept;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Provide the maximal binary length in bytes given the base64 input.
|
|
|
|
|
* In general, if the input contains ASCII spaces, the result will be less than
|
|
|
|
|
* the maximum length.
|
|
|
|
|
*
|
|
|
|
|
* @param input the base64 input to process, in ASCII stored as 16-bit units
|
|
|
|
|
* @param length the length of the base64 input in 16-bit units
|
|
|
|
|
* @return maximal number of binary bytes
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t * input, size_t length) noexcept;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a base64 input to a binary ouput.
|
|
|
|
|
*
|
|
|
|
|
* This function follows the WHATWG forgiving-base64 format, which means that it will
|
|
|
|
|
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
|
|
|
|
|
* equal signs at the end) or an unpadded input (without any equal signs at the end).
|
|
|
|
|
*
|
|
|
|
|
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
|
|
|
|
|
*
|
|
|
|
|
* This function will fail in case of invalid input. There are two possible reasons for
|
|
|
|
|
* failure: the input contains a number of base64 characters that when divided by 4, leaves
|
|
|
|
|
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
|
|
|
|
|
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
|
|
|
|
|
*
|
|
|
|
|
* When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the input
|
|
|
|
|
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
|
|
|
|
|
* r.count contains the number of bytes decoded.
|
|
|
|
|
*
|
|
|
|
|
* You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long.
|
|
|
|
|
* If you fail to provide that much space, the function may cause a buffer overflow.
|
|
|
|
|
*
|
|
|
|
|
* @param input the base64 string to process
|
|
|
|
|
* @param length the length of the string in bytes
|
|
|
|
|
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
|
|
|
|
|
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
|
|
|
|
|
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in bytes) if any, or the number of bytes written if successful.
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused result base64_to_binary(const char * input, size_t length, char* output, base64_options options = base64_default) noexcept;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Provide the base64 length in bytes given the length of a binary input.
|
|
|
|
|
*
|
|
|
|
|
* @param length the length of the input in bytes
|
|
|
|
|
* @return number of base64 bytes
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options = base64_default) noexcept;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
|
|
|
|
|
* a multiple of 4 bytes long.
|
|
|
|
|
*
|
|
|
|
|
* This function always succeeds.
|
|
|
|
|
*
|
|
|
|
|
* @param input the binary to process
|
|
|
|
|
* @param length the length of the input in bytes
|
|
|
|
|
* @param output the pointer to buffer that can hold the conversion result (should be at least base64_length_from_binary(length) bytes long)
|
|
|
|
|
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
|
|
|
|
|
* @return number of written bytes, will be equal to base64_length_from_binary(length, options)
|
|
|
|
|
*/
|
|
|
|
|
size_t binary_to_base64(const char * input, size_t length, char* output, base64_options options = base64_default) noexcept;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a base64 input to a binary ouput.
|
|
|
|
|
*
|
|
|
|
|
* This function follows the WHATWG forgiving-base64 format, which means that it will
|
|
|
|
|
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
|
|
|
|
|
* equal signs at the end) or an unpadded input (without any equal signs at the end).
|
|
|
|
|
*
|
|
|
|
|
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
|
|
|
|
|
*
|
|
|
|
|
* This function will fail in case of invalid input. There are two possible reasons for
|
|
|
|
|
* failure: the input contains a number of base64 characters that when divided by 4, leaves
|
|
|
|
|
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
|
|
|
|
|
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
|
|
|
|
|
*
|
|
|
|
|
* When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the input
|
|
|
|
|
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
|
|
|
|
|
* r.count contains the number of bytes decoded.
|
|
|
|
|
*
|
|
|
|
|
* You should call this function with a buffer that is at least maximal_binary_length_from_utf6_base64(input, length) bytes long.
|
|
|
|
|
* If you fail to provide that much space, the function may cause a buffer overflow.
|
|
|
|
|
*
|
|
|
|
|
* @param input the base64 string to process, in ASCII stored as 16-bit units
|
|
|
|
|
* @param length the length of the string in 16-bit units
|
|
|
|
|
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
|
|
|
|
|
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
|
|
|
|
|
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and position of the INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number of bytes written if successful.
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused result base64_to_binary(const char16_t * input, size_t length, char* output, base64_options options = base64_default) noexcept;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a base64 input to a binary ouput.
|
|
|
|
|
*
|
|
|
|
|
* This function follows the WHATWG forgiving-base64 format, which means that it will
|
|
|
|
|
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
|
|
|
|
|
* equal signs at the end) or an unpadded input (without any equal signs at the end).
|
|
|
|
|
*
|
|
|
|
|
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
|
|
|
|
|
*
|
|
|
|
|
* This function will fail in case of invalid input. There are three possible reasons for
|
|
|
|
|
* failure: the input contains a number of base64 characters that when divided by 4, leaves
|
|
|
|
|
* a single remainder character (BASE64_INPUT_REMAINDER), the input contains a character
|
|
|
|
|
* that is not a valid base64 character (INVALID_BASE64_CHARACTER), or the output buffer
|
|
|
|
|
* is too small (OUTPUT_BUFFER_TOO_SMALL).
|
|
|
|
|
*
|
|
|
|
|
* When OUTPUT_BUFFER_TOO_SMALL, we return both the number of bytes written
|
|
|
|
|
* and the number of units processed, see description of the parameters and returned value.
|
|
|
|
|
*
|
|
|
|
|
* When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the input
|
|
|
|
|
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
|
|
|
|
|
* r.count contains the number of bytes decoded.
|
|
|
|
|
*
|
|
|
|
|
* The INVALID_BASE64_CHARACTER cases are considered fatal and you are expected to discard
|
|
|
|
|
* the output.
|
|
|
|
|
*
|
|
|
|
|
* @param input the base64 string to process, in ASCII stored as 8-bit or 16-bit units
|
|
|
|
|
* @param length the length of the string in 8-bit or 16-bit units.
|
|
|
|
|
* @param output the pointer to buffer that can hold the conversion result.
|
|
|
|
|
* @param outlen the number of bytes that can be written in the output buffer. Upon return, it is modified to reflect how many bytes were written.
|
|
|
|
|
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
|
|
|
|
|
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and position of the INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number of units processed if successful.
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused result base64_to_binary_safe(const char * input, size_t length, char* output, size_t& outlen, base64_options options = base64_default) noexcept;
|
|
|
|
|
simdutf_warn_unused result base64_to_binary_safe(const char16_t * input, size_t length, char* output, size_t& outlen, base64_options options = base64_default) noexcept;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* An implementation of simdutf for a particular CPU architecture.
|
|
|
|
|
*
|
|
|
|
@ -2239,7 +2459,7 @@ public:
|
|
|
|
|
*
|
|
|
|
|
* @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
|
|
|
|
|
*/
|
|
|
|
|
virtual const std::string &name() const { return _name; }
|
|
|
|
|
virtual std::string name() const { return std::string(_name); }
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The description of this implementation.
|
|
|
|
@ -2249,7 +2469,7 @@ public:
|
|
|
|
|
*
|
|
|
|
|
* @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
|
|
|
|
|
*/
|
|
|
|
|
virtual const std::string &description() const { return _description; }
|
|
|
|
|
virtual std::string description() const { return std::string(_description); }
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The instruction sets this implementation is compiled against
|
|
|
|
@ -2473,12 +2693,14 @@ public:
|
|
|
|
|
* @param input the UTF-8 string to convert
|
|
|
|
|
* @param length the length of the string in bytes
|
|
|
|
|
* @param latin1_output the pointer to buffer that can hold conversion result
|
|
|
|
|
* @return the number of written char; 0 if the input was not valid UTF-8 string
|
|
|
|
|
* @return the number of written char; 0 if the input was not valid UTF-8 string or if it cannot be represented as Latin1
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused virtual size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert possibly broken UTF-8 string into latin1 string with errors
|
|
|
|
|
* Convert possibly broken UTF-8 string into latin1 string with errors.
|
|
|
|
|
* If the string cannot be represented as Latin1, an error
|
|
|
|
|
* code is returned.
|
|
|
|
|
*
|
|
|
|
|
* During the conversion also validation of the input string is done.
|
|
|
|
|
* This function is suitable to work with inputs from untrusted sources.
|
|
|
|
@ -2654,7 +2876,7 @@ public:
|
|
|
|
|
* @param input the UTF-16LE string to convert
|
|
|
|
|
* @param length the length of the string in 2-byte code units (char16_t)
|
|
|
|
|
* @param latin1_buffer the pointer to buffer that can hold conversion result
|
|
|
|
|
* @return number of written code units; 0 if input is not a valid UTF-16LE string
|
|
|
|
|
* @return number of written code units; 0 if input is not a valid UTF-16LE string or if it cannot be represented as Latin1
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused virtual size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
|
|
|
|
|
|
|
|
|
@ -2669,12 +2891,14 @@ public:
|
|
|
|
|
* @param input the UTF-16BE string to convert
|
|
|
|
|
* @param length the length of the string in 2-byte code units (char16_t)
|
|
|
|
|
* @param latin1_buffer the pointer to buffer that can hold conversion result
|
|
|
|
|
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
|
|
|
|
|
* @return number of written code units; 0 if input is not a valid UTF-16BE string or if it cannot be represented as Latin1
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused virtual size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert possibly broken UTF-16LE string into Latin1 string.
|
|
|
|
|
* If the string cannot be represented as Latin1, an error
|
|
|
|
|
* is returned.
|
|
|
|
|
*
|
|
|
|
|
* During the conversion also validation of the input string is done.
|
|
|
|
|
* This function is suitable to work with inputs from untrusted sources.
|
|
|
|
@ -2689,6 +2913,8 @@ public:
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert possibly broken UTF-16BE string into Latin1 string.
|
|
|
|
|
* If the string cannot be represented as Latin1, an error
|
|
|
|
|
* is returned.
|
|
|
|
|
*
|
|
|
|
|
* During the conversion also validation of the input string is done.
|
|
|
|
|
* This function is suitable to work with inputs from untrusted sources.
|
|
|
|
@ -2949,6 +3175,7 @@ public:
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert possibly broken UTF-32 string into Latin1 string and stop on error.
|
|
|
|
|
* If the string cannot be represented as Latin1, an error is returned.
|
|
|
|
|
*
|
|
|
|
|
* During the conversion also validation of the input string is done.
|
|
|
|
|
* This function is suitable to work with inputs from untrusted sources.
|
|
|
|
@ -2960,7 +3187,6 @@ public:
|
|
|
|
|
* @param latin1_buffer the pointer to buffer that can hold conversion result
|
|
|
|
|
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
simdutf_warn_unused virtual result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -3282,13 +3508,107 @@ public:
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused virtual size_t count_utf8(const char * input, size_t length) const noexcept = 0;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Provide the maximal binary length in bytes given the base64 input.
|
|
|
|
|
* In general, if the input contains ASCII spaces, the result will be less than
|
|
|
|
|
* the maximum length.
|
|
|
|
|
*
|
|
|
|
|
* @param input the base64 input to process
|
|
|
|
|
* @param length the length of the base64 input in bytes
|
|
|
|
|
* @return maximal number of binary bytes
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused virtual size_t maximal_binary_length_from_base64(const char * input, size_t length) const noexcept = 0;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Provide the maximal binary length in bytes given the base64 input.
|
|
|
|
|
* In general, if the input contains ASCII spaces, the result will be less than
|
|
|
|
|
* the maximum length.
|
|
|
|
|
*
|
|
|
|
|
* @param input the base64 input to process, in ASCII stored as 16-bit units
|
|
|
|
|
* @param length the length of the base64 input in 16-bit units
|
|
|
|
|
* @return maximal number of binary bytes
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused virtual size_t maximal_binary_length_from_base64(const char16_t * input, size_t length) const noexcept = 0;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a base64 input to a binary ouput.
|
|
|
|
|
*
|
|
|
|
|
* This function follows the WHATWG forgiving-base64 format, which means that it will
|
|
|
|
|
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
|
|
|
|
|
* equal signs at the end) or an unpadded input (without any equal signs at the end).
|
|
|
|
|
*
|
|
|
|
|
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
|
|
|
|
|
*
|
|
|
|
|
* This function will fail in case of invalid input. There are two possible reasons for
|
|
|
|
|
* failure: the input contains a number of base64 characters that when divided by 4, leaves
|
|
|
|
|
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
|
|
|
|
|
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
|
|
|
|
|
*
|
|
|
|
|
* You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long.
|
|
|
|
|
* If you fail to provide that much space, the function may cause a buffer overflow.
|
|
|
|
|
*
|
|
|
|
|
* @param input the base64 string to process
|
|
|
|
|
* @param length the length of the string in bytes
|
|
|
|
|
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
|
|
|
|
|
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
|
|
|
|
|
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in bytes) if any, or the number of bytes written if successful.
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused virtual result base64_to_binary(const char * input, size_t length, char* output, base64_options options = base64_default) const noexcept = 0;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a base64 input to a binary ouput.
|
|
|
|
|
*
|
|
|
|
|
* This function follows the WHATWG forgiving-base64 format, which means that it will
|
|
|
|
|
* ignore any ASCII spaces in the input. You may provide a padded input (with one or two
|
|
|
|
|
* equal signs at the end) or an unpadded input (without any equal signs at the end).
|
|
|
|
|
*
|
|
|
|
|
* See https://infra.spec.whatwg.org/#forgiving-base64-decode
|
|
|
|
|
*
|
|
|
|
|
* This function will fail in case of invalid input. There are two possible reasons for
|
|
|
|
|
* failure: the input contains a number of base64 characters that when divided by 4, leaves
|
|
|
|
|
* a single remainder character (BASE64_INPUT_REMAINDER), or the input contains a character
|
|
|
|
|
* that is not a valid base64 character (INVALID_BASE64_CHARACTER).
|
|
|
|
|
*
|
|
|
|
|
* You should call this function with a buffer that is at least maximal_binary_length_from_utf6_base64(input, length) bytes long.
|
|
|
|
|
* If you fail to provide that much space, the function may cause a buffer overflow.
|
|
|
|
|
*
|
|
|
|
|
* @param input the base64 string to process, in ASCII stored as 16-bit units
|
|
|
|
|
* @param length the length of the string in 16-bit units
|
|
|
|
|
* @param output the pointer to buffer that can hold the conversion result (should be at least maximal_binary_length_from_base64(input, length) bytes long).
|
|
|
|
|
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
|
|
|
|
|
* @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and position of the INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number of bytes written if successful.
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused virtual result base64_to_binary(const char16_t * input, size_t length, char* output, base64_options options = base64_default) const noexcept = 0;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Provide the base64 length in bytes given the length of a binary input.
|
|
|
|
|
*
|
|
|
|
|
* @param length the length of the input in bytes
|
|
|
|
|
* @parem options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
|
|
|
|
|
* @return number of base64 bytes
|
|
|
|
|
*/
|
|
|
|
|
simdutf_warn_unused virtual size_t base64_length_from_binary(size_t length, base64_options options = base64_default) const noexcept = 0;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
|
|
|
|
|
* a multiple of 4 bytes long.
|
|
|
|
|
*
|
|
|
|
|
* This function always succeeds.
|
|
|
|
|
*
|
|
|
|
|
* @param input the binary to process
|
|
|
|
|
* @param length the length of the input in bytes
|
|
|
|
|
* @param output the pointer to buffer that can hold the conversion result (should be at least base64_length_from_binary(length) bytes long)
|
|
|
|
|
* @param options the base64 options to use, can be base64_default or base64_url, is base64_default by default.
|
|
|
|
|
* @return number of written bytes, will be equal to base64_length_from_binary(length, options)
|
|
|
|
|
*/
|
|
|
|
|
virtual size_t binary_to_base64(const char * input, size_t length, char* output, base64_options options = base64_default) const noexcept = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/** @private Construct an implementation with the given name and description. For subclasses. */
|
|
|
|
|
simdutf_really_inline implementation(
|
|
|
|
|
std::string name,
|
|
|
|
|
std::string description,
|
|
|
|
|
const char* name,
|
|
|
|
|
const char* description,
|
|
|
|
|
uint32_t required_instruction_sets
|
|
|
|
|
) :
|
|
|
|
|
_name(name),
|
|
|
|
@ -3296,18 +3616,18 @@ protected:
|
|
|
|
|
_required_instruction_sets(required_instruction_sets)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
virtual ~implementation()=default;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
~implementation() = default;
|
|
|
|
|
private:
|
|
|
|
|
/**
|
|
|
|
|
* The name of this implementation.
|
|
|
|
|
*/
|
|
|
|
|
const std::string _name;
|
|
|
|
|
const char* _name;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The description of this implementation.
|
|
|
|
|
*/
|
|
|
|
|
const std::string _description;
|
|
|
|
|
const char* _description;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Instruction sets required for this implementation.
|
|
|
|
|