-
-
Notifications
You must be signed in to change notification settings - Fork 35k
feature: Buffer.lastIndexOf #4846
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
* Remove unnecessary templating from SearchString SearchString used to have separate PatternChar and SubjectChar template type arguments, apparently to support things like searching for an 8-bit string inside a 16-bit string or vice versa. However, SearchString is only used from node_buffer.cc, where PatternChar and SubjectChar are always the same. Since this is extra complexity that's unused and untested (simplifying to a single Char template argument still compiles and didn't break any unit tests), I removed it. * Use Boyer-Hoore[-Horspool] for both indexOf and lastIndexOf Add test cases for lastIndexOf. Test the fallback from BMH to Boyer-Moore, which looks like it was totally untested before. * Extra bounds checks in node_buffer.cc * Extra asserts in string_search.h * Buffer.lastIndexOf: clean up, enforce consistency w/ String.lastIndexOf * Polyfill memrchr(3) for non-GNU systems
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -944,9 +944,44 @@ void Compare(const FunctionCallbackInfo<Value> &args) { | |
| } | ||
|
|
||
|
|
||
| // Computes the offset for starting an indexOf or lastIndexOf search. | ||
| // Returns either a valid offset in [0...<length - 1>], ie inside the Buffer, | ||
| // or -1 to signal that there is no possible match. | ||
| int64_t IndexOfOffset(size_t length, int64_t offset_i64, bool is_forward) { | ||
| int64_t length_i64 = static_cast<int64_t>(length); | ||
| if (length_i64 == 0) { | ||
| // Empty buffer, no match. | ||
| return -1; | ||
| } | ||
| if (offset_i64 < 0) { | ||
| if (offset_i64 + length_i64 >= 0) { | ||
| // Negative offsets count backwards from the end of the buffer. | ||
| return length_i64 + offset_i64; | ||
| } else if (is_forward) { | ||
| // indexOf from before the start of the buffer: search the whole buffer. | ||
| return 0; | ||
| } else { | ||
| // lastIndexOf from before the start of the buffer: no match. | ||
| return -1; | ||
| } | ||
| } else { | ||
| if (offset_i64 < length_i64) { | ||
| // Valid positive offset. | ||
| return offset_i64; | ||
| } else if (is_forward) { | ||
| // indexOf from past the end of the buffer: no match. | ||
| return -1; | ||
| } else { | ||
| // lastIndexOf from past the end of the buffer: search the whole buffer. | ||
| return length_i64 - 1; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| void IndexOfString(const FunctionCallbackInfo<Value>& args) { | ||
| ASSERT(args[1]->IsString()); | ||
| ASSERT(args[2]->IsNumber()); | ||
| ASSERT(args[4]->IsBoolean()); | ||
|
|
||
| enum encoding enc = ParseEncoding(args.GetIsolate(), | ||
| args[3], | ||
|
|
@@ -956,31 +991,26 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) { | |
| SPREAD_ARG(args[0], ts_obj); | ||
|
|
||
| Local<String> needle = args[1].As<String>(); | ||
| int64_t offset_i64 = args[2]->IntegerValue(); | ||
| bool is_forward = args[4]->IsTrue(); | ||
|
|
||
| const char* haystack = ts_obj_data; | ||
| const size_t haystack_length = ts_obj_length; | ||
| // Extended latin-1 characters are 2 bytes in Utf8. | ||
| const size_t needle_length = | ||
| enc == BINARY ? needle->Length() : needle->Utf8Length(); | ||
|
|
||
|
|
||
| if (needle_length == 0 || haystack_length == 0) { | ||
| return args.GetReturnValue().Set(-1); | ||
| } | ||
|
|
||
| int64_t offset_i64 = args[2]->IntegerValue(); | ||
| size_t offset = 0; | ||
|
|
||
| if (offset_i64 < 0) { | ||
| if (offset_i64 + static_cast<int64_t>(haystack_length) < 0) { | ||
| offset = 0; | ||
| } else { | ||
| offset = static_cast<size_t>(haystack_length + offset_i64); | ||
| } | ||
| } else { | ||
| offset = static_cast<size_t>(offset_i64); | ||
| int64_t opt_offset = IndexOfOffset(haystack_length, offset_i64, is_forward); | ||
| if (opt_offset <= -1) { | ||
| return args.GetReturnValue().Set(-1); | ||
| } | ||
|
|
||
| if (haystack_length < offset || needle_length + offset > haystack_length) { | ||
| size_t offset = static_cast<size_t>(opt_offset); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a paranoid person. Mind adding a There's one other place where this is applicable. Will make a note when I get to it.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, done
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @trevnorris added |
||
| CHECK_LT(offset, haystack_length); | ||
| if (is_forward && needle_length + offset > haystack_length) { | ||
| return args.GetReturnValue().Set(-1); | ||
| } | ||
|
|
||
|
|
@@ -1008,13 +1038,15 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) { | |
| haystack_length / 2, | ||
| decoded_string, | ||
| decoder.size() / 2, | ||
| offset / 2); | ||
| offset / 2, | ||
| is_forward); | ||
| } else { | ||
| result = SearchString(reinterpret_cast<const uint16_t*>(haystack), | ||
| haystack_length / 2, | ||
| reinterpret_cast<const uint16_t*>(*needle_value), | ||
| needle_value.length(), | ||
| offset / 2); | ||
| offset / 2, | ||
| is_forward); | ||
| } | ||
| result *= 2; | ||
| } else if (enc == UTF8) { | ||
|
|
@@ -1026,7 +1058,8 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) { | |
| haystack_length, | ||
| reinterpret_cast<const uint8_t*>(*needle_value), | ||
| needle_length, | ||
| offset); | ||
| offset, | ||
| is_forward); | ||
| } else if (enc == BINARY) { | ||
| uint8_t* needle_data = static_cast<uint8_t*>(malloc(needle_length)); | ||
| if (needle_data == nullptr) { | ||
|
|
@@ -1039,7 +1072,8 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) { | |
| haystack_length, | ||
| needle_data, | ||
| needle_length, | ||
| offset); | ||
| offset, | ||
| is_forward); | ||
| free(needle_data); | ||
| } | ||
|
|
||
|
|
@@ -1050,17 +1084,18 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) { | |
| void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) { | ||
| ASSERT(args[1]->IsObject()); | ||
| ASSERT(args[2]->IsNumber()); | ||
| ASSERT(args[4]->IsBoolean()); | ||
|
|
||
| enum encoding enc = ParseEncoding(args.GetIsolate(), | ||
| args[3], | ||
| UTF8); | ||
|
|
||
| THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to place And even though they're messing around in a way they shouldn't, it still shouldn't be possible to cause an abort using the JS API.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure, i can add that, but how did this work before?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @trevnorris added
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup. It crashed before. Thanks for bringing it to my attention. :) |
||
| THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]); | ||
| SPREAD_ARG(args[0], ts_obj); | ||
| SPREAD_ARG(args[1], buf); | ||
|
|
||
| if (buf_length > 0) | ||
| CHECK_NE(buf_data, nullptr); | ||
| int64_t offset_i64 = args[2]->IntegerValue(); | ||
| bool is_forward = args[4]->IsTrue(); | ||
|
|
||
| const char* haystack = ts_obj_data; | ||
| const size_t haystack_length = ts_obj_length; | ||
|
|
@@ -1071,19 +1106,13 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) { | |
| return args.GetReturnValue().Set(-1); | ||
| } | ||
|
|
||
| int64_t offset_i64 = args[2]->IntegerValue(); | ||
| size_t offset = 0; | ||
|
|
||
| if (offset_i64 < 0) { | ||
| if (offset_i64 + static_cast<int64_t>(haystack_length) < 0) | ||
| offset = 0; | ||
| else | ||
| offset = static_cast<size_t>(haystack_length + offset_i64); | ||
| } else { | ||
| offset = static_cast<size_t>(offset_i64); | ||
| int64_t opt_offset = IndexOfOffset(haystack_length, offset_i64, is_forward); | ||
| if (opt_offset <= -1) { | ||
| return args.GetReturnValue().Set(-1); | ||
| } | ||
|
|
||
| if (haystack_length < offset || needle_length + offset > haystack_length) { | ||
| size_t offset = static_cast<size_t>(opt_offset); | ||
| CHECK_LT(offset, haystack_length); | ||
| if (is_forward && needle_length + offset > haystack_length) { | ||
| return args.GetReturnValue().Set(-1); | ||
| } | ||
|
|
||
|
|
@@ -1098,15 +1127,17 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) { | |
| haystack_length / 2, | ||
| reinterpret_cast<const uint16_t*>(needle), | ||
| needle_length / 2, | ||
| offset / 2); | ||
| offset / 2, | ||
| is_forward); | ||
| result *= 2; | ||
| } else { | ||
| result = SearchString( | ||
| reinterpret_cast<const uint8_t*>(haystack), | ||
| haystack_length, | ||
| reinterpret_cast<const uint8_t*>(needle), | ||
| needle_length, | ||
| offset); | ||
| offset, | ||
| is_forward); | ||
| } | ||
|
|
||
| args.GetReturnValue().Set( | ||
|
|
@@ -1116,28 +1147,29 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) { | |
| void IndexOfNumber(const FunctionCallbackInfo<Value>& args) { | ||
| ASSERT(args[1]->IsNumber()); | ||
| ASSERT(args[2]->IsNumber()); | ||
| ASSERT(args[3]->IsBoolean()); | ||
|
|
||
| THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]); | ||
| SPREAD_ARG(args[0], ts_obj); | ||
|
|
||
| uint32_t needle = args[1]->Uint32Value(); | ||
| int64_t offset_i64 = args[2]->IntegerValue(); | ||
| size_t offset; | ||
|
|
||
| if (offset_i64 < 0) { | ||
| if (offset_i64 + static_cast<int64_t>(ts_obj_length) < 0) | ||
| offset = 0; | ||
| else | ||
| offset = static_cast<size_t>(ts_obj_length + offset_i64); | ||
| } else { | ||
| offset = static_cast<size_t>(offset_i64); | ||
| } | ||
| bool is_forward = args[3]->IsTrue(); | ||
|
|
||
| if (ts_obj_length == 0 || offset + 1 > ts_obj_length) | ||
| int64_t opt_offset = IndexOfOffset(ts_obj_length, offset_i64, is_forward); | ||
| if (opt_offset <= -1) { | ||
| return args.GetReturnValue().Set(-1); | ||
| } | ||
| size_t offset = static_cast<size_t>(opt_offset); | ||
| CHECK_LT(offset, ts_obj_length); | ||
|
|
||
| void* ptr = memchr(ts_obj_data + offset, needle, ts_obj_length - offset); | ||
| char* ptr_char = static_cast<char*>(ptr); | ||
| const void* ptr; | ||
| if (is_forward) { | ||
| ptr = memchr(ts_obj_data + offset, needle, ts_obj_length - offset); | ||
| } else { | ||
| ptr = node::stringsearch::MemrchrFill(ts_obj_data, needle, offset + 1); | ||
| } | ||
| const char* ptr_char = static_cast<const char*>(ptr); | ||
| args.GetReturnValue().Set(ptr ? static_cast<int>(ptr_char - ts_obj_data) | ||
| : -1); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
side note: more sure way here would be to pass all remaining values to C++, check if
node::Buffer::HasInstance()orargs[1]->IsNumber()then branch. Otherwiseenv()->ThrowTypeError(). Would get around potential proto munging that causesinstanceofto incorrectly return true.Don't bother making the change. Just making the note.