◐ Shell
clean mode source ↗

deps: update simdjson to 3.12.2 · nodejs/node@03cd792

1-

/* auto-generated on 2025-01-27 20:34:35 -0500. Do not edit! */

1+

/* auto-generated on 2025-02-14 16:11:36 -0500. Do not edit! */

22

/* including simdjson.h: */

33

/* begin file simdjson.h */

44

#ifndef SIMDJSON_H

@@ -2437,7 +2437,7 @@ namespace std {

24372437

#define SIMDJSON_SIMDJSON_VERSION_H

2438243824392439

/** The version of simdjson being used (major.minor.revision) */

2440-

#define SIMDJSON_VERSION "3.12.0"

2440+

#define SIMDJSON_VERSION "3.12.2"

2441244124422442

namespace simdjson {

24432443

enum {

@@ -2452,7 +2452,7 @@ enum {

24522452

/**

24532453

* The revision (major.minor.REVISION) of simdjson being used.

24542454

*/

2455-

SIMDJSON_VERSION_REVISION = 0

2455+

SIMDJSON_VERSION_REVISION = 2

24562456

};

24572457

} // namespace simdjson

24582458

@@ -17948,14 +17948,18 @@ namespace simd {

17948179481794917949

// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).

1795017950

// Passing a 0 value for mask would be equivalent to writing out every byte to output.

17951-

// Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes

17951+

// Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes

1795217952

// get written.

1795317953

// Design consideration: it seems like a function with the

1795417954

// signature simd8<L> compress(uint32_t mask) would be

1795517955

// sensible, but the AVX ISA makes this kind of approach difficult.

1795617956

template<typename L>

1795717957

simdjson_inline void compress(uint64_t mask, L * output) const {

17958-

_mm512_mask_compressstoreu_epi8 (output,~mask,*this);

17958+

// we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability

17959+

// (AMD Zen4 has terrible performance with it, it is effectively broken)

17960+

// _mm512_mask_compressstoreu_epi8 (output,~mask,*this);

17961+

__m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);

17962+

_mm512_storeu_si512(output, compressed); // could use a mask

1795917963

}

17960179641796117965

template<typename L>

@@ -65401,14 +65405,18 @@ namespace simd {

65401654056540265406

// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).

6540365407

// Passing a 0 value for mask would be equivalent to writing out every byte to output.

65404-

// Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes

65408+

// Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes

6540565409

// get written.

6540665410

// Design consideration: it seems like a function with the

6540765411

// signature simd8<L> compress(uint32_t mask) would be

6540865412

// sensible, but the AVX ISA makes this kind of approach difficult.

6540965413

template<typename L>

6541065414

simdjson_inline void compress(uint64_t mask, L * output) const {

65411-

_mm512_mask_compressstoreu_epi8 (output,~mask,*this);

65415+

// we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability

65416+

// (AMD Zen4 has terrible performance with it, it is effectively broken)

65417+

// _mm512_mask_compressstoreu_epi8 (output,~mask,*this);

65418+

__m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);

65419+

_mm512_storeu_si512(output, compressed); // could use a mask

6541265420

}

65413654216541465422

template<typename L>