encoding: make TextDecoder handle BOM correctly · nodejs/node@c80a4d8 · GitHub
Skip to content

Commit c80a4d8

Browse files
addaleaxMylesBorins
authored andcommitted
encoding: make TextDecoder handle BOM correctly
Do not accept the BOM if it comes from a different encoding, and only discard the BOM after it has actually been read (including when it is spread over multiple chunks in streaming mode). Fixes: #25315 PR-URL: #30132 Reviewed-By: Gus Caplan <me@gus.host>
1 parent 2994976 commit c80a4d8

5 files changed

Lines changed: 49 additions & 34 deletions

File tree

lib/internal/encoding.js

Lines changed: 12 additions & 15 deletions

src/node_buffer.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,10 +219,10 @@ size_t Length(Local<Object> obj) {
219219
}
220220

221221

222-
inline MaybeLocal<Uint8Array> New(Environment* env,
223-
Local<ArrayBuffer> ab,
224-
size_t byte_offset,
225-
size_t length) {
222+
MaybeLocal<Uint8Array> New(Environment* env,
223+
Local<ArrayBuffer> ab,
224+
size_t byte_offset,
225+
size_t length) {
226226
CHECK(!env->buffer_prototype_object().IsEmpty());
227227
Local<Uint8Array> ui = Uint8Array::New(ab, byte_offset, length);
228228
Maybe<bool> mb =

src/node_i18n.cc

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ using v8::NewStringType;
9595
using v8::Object;
9696
using v8::ObjectTemplate;
9797
using v8::String;
98+
using v8::Uint8Array;
9899
using v8::Value;
99100

100101
namespace i18n {
@@ -227,25 +228,41 @@ class ConverterObject : public BaseObject, Converter {
227228
const char* source = input.data();
228229
size_t source_length = input.length();
229230

230-
if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) {
231-
int32_t bomOffset = 0;
232-
ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status);
233-
source += bomOffset;
234-
source_length -= bomOffset;
235-
converter->bomSeen_ = true;
236-
}
237-
238231
UChar* target = *result;
239232
ucnv_toUnicode(converter->conv,
240233
&target, target + (limit * sizeof(UChar)),
241234
&source, source + source_length,
242235
nullptr, flush, &status);
243236

244237
if (U_SUCCESS(status)) {
245-
if (limit > 0)
238+
bool omit_initial_bom = false;
239+
if (limit > 0) {
246240
result.SetLength(target - &result[0]);
241+
if (result.length() > 0 &&
242+
converter->unicode_ &&
243+
!converter->ignoreBOM_ &&
244+
!converter->bomSeen_) {
245+
// If the very first result in the stream is a BOM, and we are not
246+
// explicitly told to ignore it, then we mark it for discarding.
247+
if (result[0] == 0xFEFF) {
248+
omit_initial_bom = true;
249+
}
250+
converter->bomSeen_ = true;
251+
}
252+
}
247253
ret = ToBufferEndian(env, &result);
248-
args.GetReturnValue().Set(ret.ToLocalChecked());
254+
if (omit_initial_bom && !ret.IsEmpty()) {
255+
// Peform `ret = ret.slice(2)`.
256+
CHECK(ret.ToLocalChecked()->IsUint8Array());
257+
Local<Uint8Array> orig_ret = ret.ToLocalChecked().As<Uint8Array>();
258+
ret = Buffer::New(env,
259+
orig_ret->Buffer(),
260+
orig_ret->ByteOffset() + 2,
261+
orig_ret->ByteLength() - 2)
262+
.FromMaybe(Local<Uint8Array>());
263+
}
264+
if (!ret.IsEmpty())
265+
args.GetReturnValue().Set(ret.ToLocalChecked());
249266
return;
250267
}
251268

src/node_internals.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,11 @@ v8::MaybeLocal<v8::Object> New(Environment* env,
161161
char* data,
162162
size_t length,
163163
bool uses_malloc);
164-
164+
// Creates a Buffer instance over an existing Uint8Array.
165+
v8::MaybeLocal<v8::Uint8Array> New(Environment* env,
166+
v8::Local<v8::ArrayBuffer> ab,
167+
size_t byte_offset,
168+
size_t length);
165169
// Construct a Buffer from a MaybeStackBuffer (and also its subclasses like
166170
// Utf8Value and TwoByteValue).
167171
// If |buf| is invalidated, an empty MaybeLocal is returned, and nothing is

test/wpt/status/encoding.json

Lines changed: 1 addition & 4 deletions

0 commit comments

Comments
 (0)