#include <iostream>
#include <bitset>
#include <x86intrin.h>
inline std::uint64_t parse_16_chars(const char* numbers) noexcept
{
// Setup Constants
const auto mul_1_10 = _mm_set_epi8 (
1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10
);
const auto mul_1_100 = _mm_set_epi16 (
1, 100, 1, 100, 1, 100, 1, 100
);
const auto mul_1_10000 = _mm_set_epi16 (
0, 0, 0, 0, 1, 10000, 1, 10000
);
auto chunk = _mm_lddqu_si128(
reinterpret_cast<const __m128i*>(numbers)
);
auto ASCI_ZEROS = _mm_set1_epi8('0');
// convert ASCI values to numbers
chunk = _mm_subs_epu8(chunk, ASCI_ZEROS);
// Multiply packed signed 8-bit integers a and b and produce 16bit integers and add them
chunk = _mm_maddubs_epi16(chunk, mul_1_10);
// Multiply packed signed 16-bit integers a and b and produce 32bit integers and add them
chunk = _mm_madd_epi16(chunk, mul_1_100);
/*
dst[15:0] := SaturateU16(a[31:0]) |
dst[31:16] := SaturateU16(a[63:32]) |
| -> A [32bit, 32bit, 32bit, 32bit] -> [0, 0, 0, 0, A-16bit, A-16bit, A-16bit, A-16bit]
dst[47:32] := SaturateU16(a[95:64]) |
dst[63:48] := SaturateU16(a[127:96]) |
dst[79:64] := SaturateU16(b[31:0]) |
dst[95:80] := SaturateU16(b[63:32]) |
| -> B [32bit, 32bit, 32bit, 32bit] -> [B-16bit, B-16bit, B-16bit, B-16bit, A-16bit, A-16bit, A-16bit, A-16bit]
dst[111:96] := SaturateU16(b[95:64]) |
dst[127:112] := SaturateU16(b[127:96]) |
*/
// convert 32bit integers to 16bit integers
// so __mm__madd_api16 operates on 16bit integers
// Based on above diagram it doesnt matter what you give for B result wont change because we are not using the highest bits
// 128bits = [32bit, 32bit, 32bit, 32bit] -> unsigned saturation -> [B-16bit, B-16bit, B-16bit, B-16bit, A-16bit, A-16bit, A-16bit, A-16bit]
chunk = _mm_packus_epi32(chunk, mul_1_100);
// Multiply packed signed 16-bit integers a and b and produce 32bit integers and add them
// chunk = [B-32bit, B-32bit, A-32bit, A-32bit]
chunk = _mm_madd_epi16(chunk, mul_1_10000);
// lowest A-32bits * multiplier + highest A-32bit
// heavy instruction should we just do normal bit shifiting approach to extract numbers ?
return ((uint64_t)_mm_extract_epi32(chunk, 0)*100000000) + (uint64_t)_mm_extract_epi32(chunk, 1);
}
int main () {
// Only 16 chars
const char* numbers = "2147483647";
const auto result = parse_16_chars(numbers);
std::cout << result << std::endl;
}
I have successfully implemented a parser for 16-digit integers based on information gathered from online resources. However, I am now seeking guidance on generalizing this parser to handle integers of varying lengths, specifically any number of digits greater than 8.
Could someone provide insights or suggest a good approach to make my existing parser adaptable to integers with lengths beyond 8 digits? I am particularly interested in optimizing the method for speed and efficiency.
The input stream may contain integers with various lengths, and I aim to modify my parser to accommodate this variability. Any recommendations or best practices for dynamically handling different digit lengths in a fast and efficient manner would be greatly appreciated. Note that this is just for learning purposes.
Thank you in advance for your assistance!