util: add fast path for Latin1 decoding · nodejs/node@374eb41 · GitHub
Skip to content

Commit 374eb41

Browse files
mertcanaltinruyadorno
authored andcommitted
util: add fast path for Latin1 decoding
PR-URL: #55275 Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Daniel Lemire <daniel@lemire.me>
1 parent 34c6882 commit 374eb41

5 files changed

Lines changed: 212 additions & 2 deletions

File tree

benchmark/util/text-decoder.js

Lines changed: 1 addition & 1 deletion

lib/internal/encoding.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const kDecoder = Symbol('decoder');
2929
const kEncoder = Symbol('encoder');
3030
const kFatal = Symbol('kFatal');
3131
const kUTF8FastPath = Symbol('kUTF8FastPath');
32+
const kLatin1FastPath = Symbol('kLatin1FastPath');
3233
const kIgnoreBOM = Symbol('kIgnoreBOM');
3334

3435
const {
@@ -55,6 +56,7 @@ const {
5556
encodeIntoResults,
5657
encodeUtf8String,
5758
decodeUTF8,
59+
decodeLatin1,
5860
} = binding;
5961

6062
const { Buffer } = require('buffer');
@@ -419,9 +421,10 @@ function makeTextDecoderICU() {
419421
this[kFatal] = Boolean(options?.fatal);
420422
// Only support fast path for UTF-8.
421423
this[kUTF8FastPath] = enc === 'utf-8';
424+
this[kLatin1FastPath] = enc === 'windows-1252';
422425
this[kHandle] = undefined;
423426

424-
if (!this[kUTF8FastPath]) {
427+
if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) {
425428
this.#prepareConverter();
426429
}
427430
}
@@ -438,11 +441,16 @@ function makeTextDecoderICU() {
438441
validateDecoder(this);
439442

440443
this[kUTF8FastPath] &&= !(options?.stream);
444+
this[kLatin1FastPath] &&= !(options?.stream);
441445

442446
if (this[kUTF8FastPath]) {
443447
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
444448
}
445449

450+
if (this[kLatin1FastPath]) {
451+
return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]);
452+
}
453+
446454
this.#prepareConverter();
447455

448456
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

src/encoding_binding.cc

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "encoding_binding.h"
22
#include "ada.h"
33
#include "env-inl.h"
4+
#include "node_buffer.h"
45
#include "node_errors.h"
56
#include "node_external_reference.h"
67
#include "simdutf.h"
@@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
226227
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
227228
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
228229
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
230+
SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1);
229231
}
230232

231233
void BindingData::CreatePerContextProperties(Local<Object> target,
@@ -243,6 +245,50 @@ void BindingData::RegisterTimerExternalReferences(
243245
registry->Register(DecodeUTF8);
244246
registry->Register(ToASCII);
245247
registry->Register(ToUnicode);
248+
registry->Register(DecodeLatin1);
249+
}
250+
251+
void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
252+
Environment* env = Environment::GetCurrent(args);
253+
254+
CHECK_GE(args.Length(), 1);
255+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
256+
args[0]->IsArrayBufferView())) {
257+
return node::THROW_ERR_INVALID_ARG_TYPE(
258+
env->isolate(),
259+
"The \"input\" argument must be an instance of ArrayBuffer, "
260+
"SharedArrayBuffer, or ArrayBufferView.");
261+
}
262+
263+
bool ignore_bom = args[1]->IsTrue();
264+
bool has_fatal = args[2]->IsTrue();
265+
266+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
267+
const uint8_t* data = buffer.data();
268+
size_t length = buffer.length();
269+
270+
if (ignore_bom && length > 0 && data[0] == 0xFF) {
271+
data++;
272+
length--;
273+
}
274+
275+
if (length == 0) {
276+
return args.GetReturnValue().SetEmptyString();
277+
}
278+
279+
std::string result(length * 2, '\0');
280+
281+
size_t written = simdutf::convert_latin1_to_utf8(
282+
reinterpret_cast<const char*>(data), length, result.data());
283+
284+
if (has_fatal && written == 0) {
285+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
286+
env->isolate(), "The encoded data was not valid for encoding latin1");
287+
}
288+
289+
Local<Object> buffer_result =
290+
node::Buffer::Copy(env, result.c_str(), written).ToLocalChecked();
291+
args.GetReturnValue().Set(buffer_result);
246292
}
247293

248294
} // namespace encoding_binding

src/encoding_binding.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
3131
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
3232
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
3333
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
34+
static void DecodeLatin1(const v8::FunctionCallbackInfo<v8::Value>& args);
3435

3536
static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
3637
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);
Lines changed: 155 additions & 0 deletions

0 commit comments

Comments
 (0)