◐ Shell
clean mode source ↗

src: use stack allocation for small string encoding · nodejs/node@6151999

@@ -145,12 +145,49 @@ class ExternString: public ResourceType {

145145

size_t length_;

146146

};

147147148+

typedef ExternString<String::ExternalOneByteStringResource, char>

149+

ExternOneByteString;

150+

typedef ExternString<String::ExternalStringResource, uint16_t>

151+

ExternTwoByteString;

152+153+

template <typename EncodeFn>

154+

static MaybeLocal<Value> EncodeOneByteString(Isolate* isolate,

155+

size_t length,

156+

EncodeFn encode) {

157+

// 512B stack threshold: covers common small outputs (hex SHA-256/512, UUIDs).

158+

// Larger thresholds were benchmarked

159+

MaybeStackBuffer<char, 512> buf(length);

160+

encode(buf.out());

161+

// Copy stack-backed data, but release heap-backed storage to V8.

162+

if (buf.IsAllocated()) {

163+

char* data = buf.out();

164+

buf.Release();

165+

return ExternOneByteString::New(isolate, data, length);

166+

}

167+

return String::NewFromOneByte(isolate,

168+

reinterpret_cast<const uint8_t*>(buf.out()),

169+

v8::NewStringType::kNormal,

170+

static_cast<int>(length));

171+

}

148172149-

typedef ExternString<String::ExternalOneByteStringResource,

150-

char> ExternOneByteString;

151-

typedef ExternString<String::ExternalStringResource,

152-

uint16_t> ExternTwoByteString;

153-173+

template <typename EncodeFn>

174+

static MaybeLocal<Value> EncodeTwoByteString(Isolate* isolate,

175+

size_t char_length,

176+

EncodeFn encode) {

177+

// 256 uint16_t = 512 bytes on the stack, matching the one-byte

178+

MaybeStackBuffer<uint16_t, 256> buf(char_length);

179+

encode(buf.out());

180+

// Copy stack-backed data, but release heap-backed storage to V8.

181+

if (buf.IsAllocated()) {

182+

uint16_t* data = buf.out();

183+

buf.Release();

184+

return ExternTwoByteString::New(isolate, data, char_length);

185+

}

186+

return String::NewFromTwoByte(isolate,

187+

buf.out(),

188+

v8::NewStringType::kNormal,

189+

static_cast<int>(char_length));

190+

}

154191155192

template <>

156193

MaybeLocal<Value> ExternOneByteString::NewExternal(

@@ -527,13 +564,10 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,

527564

buflen = keep_buflen_in_range(buflen);

528565

if (simdutf::validate_ascii_with_errors(buf, buflen).error) {

529566

// The input contains non-ASCII bytes.

530-

char* out = node::UncheckedMalloc(buflen);

531-

if (out == nullptr) {

532-

isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));

533-

return MaybeLocal<Value>();

534-

}

535-

nbytes::ForceAscii(buf, out, buflen);

536-

return ExternOneByteString::New(isolate, out, buflen);

567+568+

return EncodeOneByteString(isolate, buflen, [buf, buflen](char* dst) {

569+

nbytes::ForceAscii(buf, dst, buflen);

570+

});

537571

} else {

538572

return ExternOneByteString::NewFromCopy(isolate, buf, buflen);

539573

}

@@ -557,14 +591,12 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,

557591

isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));

558592

return MaybeLocal<Value>();

559593

}

560-

uint16_t* dst = node::UncheckedMalloc<uint16_t>(u16size);

561-

if (u16size != 0 && dst == nullptr) {

562-

THROW_ERR_MEMORY_ALLOCATION_FAILED(isolate);

563-

return MaybeLocal<Value>();

564-

}

565-

size_t utf16len = simdutf::convert_valid_utf8_to_utf16(

566-

buf, buflen, reinterpret_cast<char16_t*>(dst));

567-

return ExternTwoByteString::New(isolate, dst, utf16len);

594+

return EncodeTwoByteString(

595+

isolate, u16size, [buf, buflen, u16size](uint16_t* dst) {

596+

size_t written = simdutf::convert_valid_utf8_to_utf16(

597+

buf, buflen, reinterpret_cast<char16_t*>(dst));

598+

CHECK_EQ(written, u16size);

599+

});

568600

}

569601570602

val =

@@ -583,77 +615,52 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,

583615

case BASE64: {

584616

buflen = keep_buflen_in_range(buflen);

585617

size_t dlen = simdutf::base64_length_from_binary(buflen);

586-

char* dst = node::UncheckedMalloc(dlen);

587-

if (dst == nullptr) {

588-

isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));

589-

return MaybeLocal<Value>();

590-

}

591-592-

size_t written = simdutf::binary_to_base64(buf, buflen, dst);

593-

CHECK_EQ(written, dlen);

594-595-

return ExternOneByteString::New(isolate, dst, dlen);

618+

return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {

619+

size_t written = simdutf::binary_to_base64(buf, buflen, dst);

620+

CHECK_EQ(written, dlen);

621+

});

596622

}

597623598624

case BASE64URL: {

599625

buflen = keep_buflen_in_range(buflen);

600626

size_t dlen =

601627

simdutf::base64_length_from_binary(buflen, simdutf::base64_url);

602-

char* dst = node::UncheckedMalloc(dlen);

603-

if (dst == nullptr) {

604-

isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));

605-

return MaybeLocal<Value>();

606-

}

607-608-

size_t written =

609-

simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url);

610-

CHECK_EQ(written, dlen);

611-612-

return ExternOneByteString::New(isolate, dst, dlen);

628+

return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {

629+

size_t written =

630+

simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url);

631+

CHECK_EQ(written, dlen);

632+

});

613633

}

614634615635

case HEX: {

616636

buflen = keep_buflen_in_range(buflen);

617637

size_t dlen = buflen * 2;

618-

char* dst = node::UncheckedMalloc(dlen);

619-

if (dst == nullptr) {

620-

isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));

621-

return MaybeLocal<Value>();

622-

}

623-

size_t written = nbytes::HexEncode(buf, buflen, dst, dlen);

624-

CHECK_EQ(written, dlen);

625-626-

return ExternOneByteString::New(isolate, dst, dlen);

638+

return EncodeOneByteString(isolate, dlen, [buf, buflen, dlen](char* dst) {

639+

size_t written = nbytes::HexEncode(buf, buflen, dst, dlen);

640+

CHECK_EQ(written, dlen);

641+

});

627642

}

628643629644

case UCS2: {

630645

buflen = keep_buflen_in_range(buflen);

631646

size_t str_len = buflen / 2;

632647

if constexpr (IsBigEndian()) {

633-

uint16_t* dst = node::UncheckedMalloc<uint16_t>(str_len);

634-

if (str_len != 0 && dst == nullptr) {

635-

isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));

636-

return MaybeLocal<Value>();

637-

}

638-

for (size_t i = 0, k = 0; k < str_len; i += 2, k += 1) {

639-

// The input is in *little endian*, because that's what Node.js

640-

// expects, so the high byte comes after the low byte.

641-

const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);

642-

const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);

643-

dst[k] = static_cast<uint16_t>(hi) << 8 | lo;

644-

}

645-

return ExternTwoByteString::New(isolate, dst, str_len);

648+

return EncodeTwoByteString(

649+

isolate, str_len, [buf, str_len](uint16_t* dst) {

650+

for (size_t i = 0, k = 0; k < str_len; i += 2, k += 1) {

651+

// The input is in *little endian*, because that's what Node.js

652+

// expects, so the high byte comes after the low byte.

653+

const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);

654+

const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);

655+

dst[k] = static_cast<uint16_t>(hi) << 8 | lo;

656+

}

657+

});

646658

}

647659

if (reinterpret_cast<uintptr_t>(buf) % 2 != 0) {

648-

// Unaligned data still means we can't directly pass it to V8.

649-

char* dst = node::UncheckedMalloc(buflen);

650-

if (dst == nullptr) {

651-

isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));

652-

return MaybeLocal<Value>();

653-

}

654-

memcpy(dst, buf, buflen);

655-

return ExternTwoByteString::New(

656-

isolate, reinterpret_cast<uint16_t*>(dst), str_len);

660+

return EncodeTwoByteString(

661+

isolate, str_len, [buf, buflen](uint16_t* dst) {

662+

memcpy(dst, buf, buflen);

663+

});

657664

}

658665

return ExternTwoByteString::NewFromCopy(

659666

isolate, reinterpret_cast<const uint16_t*>(buf), str_len);

@@ -675,15 +682,11 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,

675682

// https://nodejs.org/api/buffer.html regarding Node's "ucs2"

676683

// encoding specification

677684

if constexpr (IsBigEndian()) {

678-

uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen);

679-

if (dst == nullptr) {

680-

isolate->ThrowException(node::ERR_MEMORY_ALLOCATION_FAILED(isolate));

681-

return MaybeLocal<Value>();

682-

}

683-

size_t nbytes = buflen * sizeof(uint16_t);

684-

memcpy(dst, buf, nbytes);

685-

CHECK(nbytes::SwapBytes16(reinterpret_cast<char*>(dst), nbytes));

686-

return ExternTwoByteString::New(isolate, dst, buflen);

685+

return EncodeTwoByteString(isolate, buflen, [buf, buflen](uint16_t* dst) {

686+

size_t nbytes = buflen * sizeof(uint16_t);

687+

memcpy(dst, buf, nbytes);

688+

CHECK(nbytes::SwapBytes16(reinterpret_cast<char*>(dst), nbytes));

689+

});

687690

} else {

688691

return ExternTwoByteString::NewFromCopy(isolate, buf, buflen);

689692

}