Parsing integers from string using SIMD

121 views Asked by At
#include <iostream>
#include <bitset>
#include <x86intrin.h>


inline std::uint64_t parse_16_chars(const char* numbers) noexcept
{
  // Setup Constants
  const auto mul_1_10 = _mm_set_epi8 (
        1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10
  );
  const auto mul_1_100 = _mm_set_epi16 (
      1, 100, 1, 100, 1, 100, 1, 100
  );  
  const auto mul_1_10000 = _mm_set_epi16 (
        0, 0, 0, 0, 1, 10000, 1, 10000
  );

  auto chunk = _mm_lddqu_si128(
    reinterpret_cast<const __m128i*>(numbers)
  );

  auto ASCI_ZEROS = _mm_set1_epi8('0');
  // convert ASCI values to numbers
  chunk = _mm_subs_epu8(chunk, ASCI_ZEROS);

  // Multiply packed signed 8-bit integers a and b and produce 16bit integers and add them
  chunk = _mm_maddubs_epi16(chunk, mul_1_10);
  
  // Multiply packed signed 16-bit integers a and b and produce 32bit integers and add them
  chunk = _mm_madd_epi16(chunk, mul_1_100);


  /*
    dst[15:0] := SaturateU16(a[31:0])       |
    dst[31:16] := SaturateU16(a[63:32])     | 
                                            | -> A [32bit, 32bit, 32bit, 32bit] -> [0, 0, 0, 0, A-16bit, A-16bit, A-16bit, A-16bit]
    dst[47:32] := SaturateU16(a[95:64])     |    
    dst[63:48] := SaturateU16(a[127:96])    |
                                                                                

    dst[79:64] := SaturateU16(b[31:0])      |
    dst[95:80] := SaturateU16(b[63:32])     |
                                            | -> B [32bit, 32bit, 32bit, 32bit] -> [B-16bit, B-16bit, B-16bit, B-16bit, A-16bit, A-16bit, A-16bit, A-16bit] 
    dst[111:96] := SaturateU16(b[95:64])    |
    dst[127:112] := SaturateU16(b[127:96])  |                            
  */

  // convert 32bit integers to 16bit integers
  // so __mm__madd_api16 operates on 16bit integers
  // Based on above diagram it doesnt matter what you give for B result wont change because we are not using the highest bits  
  // 128bits = [32bit, 32bit, 32bit, 32bit] -> unsigned saturation -> [B-16bit, B-16bit, B-16bit, B-16bit, A-16bit, A-16bit, A-16bit, A-16bit]
  chunk = _mm_packus_epi32(chunk, mul_1_100);

  // Multiply packed signed 16-bit integers a and b and produce 32bit integers and add them
  // chunk = [B-32bit, B-32bit, A-32bit, A-32bit]
  chunk = _mm_madd_epi16(chunk, mul_1_10000);

  // lowest A-32bits * multiplier + highest A-32bit
  // heavy instruction should we just do normal bit shifiting approach to extract numbers ?
  return ((uint64_t)_mm_extract_epi32(chunk, 0)*100000000) + (uint64_t)_mm_extract_epi32(chunk, 1);
}

int main () {
    
    // Only 16 chars
    const char* numbers = "2147483647";
    const auto result = parse_16_chars(numbers);
    std::cout << result << std::endl;

}

I have successfully implemented a parser for 16-digit integers based on information gathered from online resources. However, I am now seeking guidance on generalizing this parser to handle integers of varying lengths, specifically any number of digits greater than 8.

Could someone provide insights or suggest a good approach to make my existing parser adaptable to integers with lengths beyond 8 digits? I am particularly interested in optimizing the method for speed and efficiency.

The input stream may contain integers with various lengths, and I aim to modify my parser to accommodate this variability. Any recommendations or best practices for dynamically handling different digit lengths in a fast and efficient manner would be greatly appreciated. Note that this is just for learning purposes.

Thank you in advance for your assistance!

0

There are 0 answers