url: refactor `pathToFileURL` to native · nodejs/node@6317f77 · GitHub
Skip to content

Commit 6317f77

Browse files
aduh95ruyadorno
authored andcommitted
url: refactor pathToFileURL to native
PR-URL: #55476 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com>
1 parent 7acb963 commit 6317f77

3 files changed

Lines changed: 120 additions & 77 deletions

File tree

lib/internal/url.js

Lines changed: 15 additions & 77 deletions

src/node_url.cc

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,108 @@ void BindingData::Deserialize(v8::Local<v8::Context> context,
7575
CHECK_NOT_NULL(binding);
7676
}
7777

78+
#ifndef LARGEST_ASCII_CHAR_CODE_TO_ENCODE
79+
#define LARGEST_ASCII_CHAR_CODE_TO_ENCODE '~'
80+
#endif
81+
82+
// RFC1738 defines the following chars as "unsafe" for URLs
83+
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
84+
constexpr auto lookup_table = []() consteval {
85+
// Each entry is an array that can hold up to 3 chars + null terminator
86+
std::array<std::array<char, 4>, LARGEST_ASCII_CHAR_CODE_TO_ENCODE + 1>
87+
result{};
88+
89+
for (uint8_t i = 0; i <= LARGEST_ASCII_CHAR_CODE_TO_ENCODE; i++) {
90+
switch (i) {
91+
#define ENCODE_CHAR(CHAR, HEX_DIGIT_2, HEX_DIGIT_1) \
92+
case CHAR: \
93+
result[i] = {{'%', HEX_DIGIT_2, HEX_DIGIT_1, 0}}; \
94+
break;
95+
96+
ENCODE_CHAR('\0', '0', '0') // '\0' == 0x00
97+
ENCODE_CHAR('\t', '0', '9') // '\t' == 0x09
98+
ENCODE_CHAR('\n', '0', 'A') // '\n' == 0x0A
99+
ENCODE_CHAR('\r', '0', 'D') // '\r' == 0x0D
100+
ENCODE_CHAR(' ', '2', '0') // ' ' == 0x20
101+
ENCODE_CHAR('"', '2', '2') // '"' == 0x22
102+
ENCODE_CHAR('#', '2', '3') // '#' == 0x23
103+
ENCODE_CHAR('%', '2', '5') // '%' == 0x25
104+
ENCODE_CHAR('?', '3', 'F') // '?' == 0x3F
105+
ENCODE_CHAR('[', '5', 'B') // '[' == 0x5B
106+
ENCODE_CHAR('\\', '5', 'C') // '\\' == 0x5C
107+
ENCODE_CHAR(']', '5', 'D') // ']' == 0x5D
108+
ENCODE_CHAR('^', '5', 'E') // '^' == 0x5E
109+
ENCODE_CHAR('|', '7', 'C') // '|' == 0x7C
110+
ENCODE_CHAR('~', '7', 'E') // '~' == 0x7E
111+
#undef ENCODE_CHAR
112+
113+
default:
114+
result[i] = {{static_cast<char>(i), '\0', '\0', '\0'}};
115+
break;
116+
}
117+
}
118+
119+
return result;
120+
}
121+
();
122+
123+
enum class OS { WINDOWS, POSIX };
124+
125+
std::string EncodePathChars(std::string_view input_str, OS operating_system) {
126+
std::string encoded = "file://";
127+
encoded.reserve(input_str.size() +
128+
7); // Reserve space for "file://" and input_str
129+
for (size_t i : input_str) {
130+
if (i > LARGEST_ASCII_CHAR_CODE_TO_ENCODE) [[unlikely]] {
131+
encoded.push_back(i);
132+
continue;
133+
}
134+
if (operating_system == OS::WINDOWS) {
135+
if (i == '\\') {
136+
encoded.push_back('/');
137+
continue;
138+
}
139+
}
140+
encoded.append(lookup_table[i].data());
141+
}
142+
143+
return encoded;
144+
}
145+
146+
void BindingData::PathToFileURL(const FunctionCallbackInfo<Value>& args) {
147+
CHECK_GE(args.Length(), 2); // input
148+
CHECK(args[0]->IsString());
149+
CHECK(args[1]->IsBoolean());
150+
151+
Realm* realm = Realm::GetCurrent(args);
152+
BindingData* binding_data = realm->GetBindingData<BindingData>();
153+
Isolate* isolate = realm->isolate();
154+
OS os = args[1]->IsTrue() ? OS::WINDOWS : OS::POSIX;
155+
156+
Utf8Value input(isolate, args[0]);
157+
auto input_str = input.ToStringView();
158+
CHECK(!input_str.empty());
159+
160+
auto out =
161+
ada::parse<ada::url_aggregator>(EncodePathChars(input_str, os), nullptr);
162+
163+
if (!out) {
164+
return ThrowInvalidURL(realm->env(), input.ToStringView(), nullptr);
165+
}
166+
167+
if (os == OS::WINDOWS && args.Length() > 2 && !args[2]->IsUndefined())
168+
[[unlikely]] {
169+
CHECK(args[2]->IsString());
170+
Utf8Value hostname(isolate, args[2]);
171+
CHECK(out->set_hostname(hostname.ToStringView()));
172+
}
173+
174+
binding_data->UpdateComponents(out->get_components(), out->type);
175+
176+
args.GetReturnValue().Set(
177+
ToV8Value(realm->context(), out->get_href(), isolate).ToLocalChecked());
178+
}
179+
78180
void BindingData::DomainToASCII(const FunctionCallbackInfo<Value>& args) {
79181
Environment* env = Environment::GetCurrent(args);
80182
CHECK_GE(args.Length(), 1); // input
@@ -371,6 +473,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
371473
SetMethodNoSideEffect(isolate, target, "format", Format);
372474
SetMethodNoSideEffect(isolate, target, "getOrigin", GetOrigin);
373475
SetMethod(isolate, target, "parse", Parse);
476+
SetMethod(isolate, target, "pathToFileURL", PathToFileURL);
374477
SetMethod(isolate, target, "update", Update);
375478
SetFastMethodNoSideEffect(
376479
isolate, target, "canParse", CanParse, {fast_can_parse_methods_, 2});
@@ -391,6 +494,7 @@ void BindingData::RegisterExternalReferences(
391494
registry->Register(Format);
392495
registry->Register(GetOrigin);
393496
registry->Register(Parse);
497+
registry->Register(PathToFileURL);
394498
registry->Register(Update);
395499
registry->Register(CanParse);
396500
registry->Register(FastCanParse);

src/node_url.h

Lines changed: 1 addition & 0 deletions

0 commit comments

Comments
 (0)