src: skip duplicate UTF-8 validation in TextDecoder fatal path · nodejs/node@3439582 · GitHub
Skip to content

Commit 3439582

Browse files
mertcanaltinaduh95
authored andcommitted
src: skip duplicate UTF-8 validation in TextDecoder fatal path
Signed-off-by: Mert Can Altin <mertgold60@gmail.com> PR-URL: #63231 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Gürgün Dayıoğlu <hey@gurgun.day> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
1 parent 2906fa8 commit 3439582

4 files changed

Lines changed: 64 additions & 8 deletions

File tree

benchmark/util/text-decoder.js

Lines changed: 21 additions & 5 deletions

src/encoding_binding.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -459,14 +459,15 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
459459
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
460460
env->isolate(), "The encoded data was not valid for encoding utf-8");
461461
}
462-
463-
// TODO(chalker): save on utf8 validity recheck in StringBytes::Encode()
464462
}
465463

466464
if (length == 0) return args.GetReturnValue().SetEmptyString();
467465

468466
Local<Value> ret;
469-
if (StringBytes::Encode(env->isolate(), data, length, UTF8).ToLocal(&ret)) {
467+
v8::MaybeLocal<Value> encoded =
468+
has_fatal ? StringBytes::EncodeValidUtf8(env->isolate(), data, length)
469+
: StringBytes::Encode(env->isolate(), data, length, UTF8);
470+
if (encoded.ToLocal(&ret)) {
470471
args.GetReturnValue().Set(ret);
471472
}
472473
}

src/string_bytes.cc

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,40 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
671671
}
672672
}
673673

674+
MaybeLocal<Value> StringBytes::EncodeValidUtf8(Isolate* isolate,
675+
const char* buf,
676+
size_t buflen) {
677+
CHECK_BUFLEN_IN_RANGE(buflen);
678+
if (!buflen) return String::Empty(isolate);
679+
buflen = keep_buflen_in_range(buflen);
680+
681+
// ASCII fast path
682+
if (!simdutf::validate_ascii_with_errors(buf, buflen).error) {
683+
return ExternOneByteString::NewFromCopy(isolate, buf, buflen);
684+
}
685+
686+
if (buflen >= 32) {
687+
size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen);
688+
if (u16size > static_cast<size_t>(v8::String::kMaxLength)) {
689+
isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
690+
return MaybeLocal<Value>();
691+
}
692+
return EncodeTwoByteString(
693+
isolate, u16size, [buf, buflen, u16size](uint16_t* dst) {
694+
size_t written = simdutf::convert_valid_utf8_to_utf16(
695+
buf, buflen, reinterpret_cast<char16_t*>(dst));
696+
CHECK_EQ(written, u16size);
697+
});
698+
}
699+
700+
Local<String> str;
701+
if (!String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen)
702+
.ToLocal(&str)) {
703+
isolate->ThrowException(node::ERR_STRING_TOO_LONG(isolate));
704+
}
705+
return str;
706+
}
707+
674708
MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
675709
const uint16_t* buf,
676710
size_t buflen) {

src/string_bytes.h

Lines changed: 5 additions & 0 deletions

0 commit comments

Comments
 (0)