Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
querystring: improve unescapeBuffer() performance
Before this, v8 would deopt when an out of bounds `inIndex` would get
passed to charCodeAt(). charCodeAt() returns NaN in such cases, so we
directly emulate that behavior as well.

Also, calls to charCodeAt() for constant strings have been replaced
by the raw character codes and parser state is now stored as an
integer instead of a string. Both of these provide a slight
performance increase.
  • Loading branch information
mscdex committed Feb 13, 2016
commit 14acc9593bcdbc5456097fbc6c55f6c7576280c6
58 changes: 27 additions & 31 deletions lib/querystring.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,63 +6,59 @@ const QueryString = exports;
const Buffer = require('buffer').Buffer;


function charCode(c) {
return c.charCodeAt(0);
}


// a safe fast alternative to decodeURIComponent
QueryString.unescapeBuffer = function(s, decodeSpaces) {
var out = new Buffer(s.length);
var state = 'CHAR'; // states: CHAR, HEX0, HEX1
var state = 0;
var n, m, hexchar;

for (var inIndex = 0, outIndex = 0; inIndex <= s.length; inIndex++) {
var c = s.charCodeAt(inIndex);
var c = inIndex < s.length ? s.charCodeAt(inIndex) : NaN;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious, what is the benefit of using NaN here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

charCodeAt() returns NaN for out of bounds indices. I was just keeping the same behavior here but avoiding the deopt.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh that makes sense. Thanks

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would there be drawbacks to changing the <= to < in the loop test? You'd have to replicate some of the out[outIndex++] assignments after the loop but it would keep the loop body simple. I guess you can also accomplish that with a s/NaN/0/.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left it as-is to keep changes minimal. I typically don't like to duplicate code when reusing the loop logic like that is easy/simple enough.

switch (state) {
case 'CHAR':
case 0: // Any character
switch (c) {
case charCode('%'):
case 37: // '%'
n = 0;
m = 0;
state = 'HEX0';
state = 1;
break;
case charCode('+'):
if (decodeSpaces) c = charCode(' ');
case 43: // '+'
if (decodeSpaces)
c = 32; // ' '
// falls through
default:
out[outIndex++] = c;
break;
}
break;

case 'HEX0':
state = 'HEX1';
case 1: // First hex digit
hexchar = c;
if (charCode('0') <= c && c <= charCode('9')) {
n = c - charCode('0');
} else if (charCode('a') <= c && c <= charCode('f')) {
n = c - charCode('a') + 10;
} else if (charCode('A') <= c && c <= charCode('F')) {
n = c - charCode('A') + 10;
if (c >= 48/*0*/ && c <= 57/*9*/) {
n = c - 48/*0*/;
} else if (c >= 65/*A*/ && c <= 70/*F*/) {
n = c - 65/*A*/ + 10;
} else if (c >= 97/*a*/ && c <= 102/*f*/) {
n = c - 97/*a*/ + 10;
} else {
out[outIndex++] = charCode('%');
out[outIndex++] = 37/*%*/;
out[outIndex++] = c;
state = 'CHAR';
state = 0;
break;
}
state = 2;
break;

case 'HEX1':
state = 'CHAR';
if (charCode('0') <= c && c <= charCode('9')) {
m = c - charCode('0');
} else if (charCode('a') <= c && c <= charCode('f')) {
m = c - charCode('a') + 10;
} else if (charCode('A') <= c && c <= charCode('F')) {
m = c - charCode('A') + 10;
case 2: // Second hex digit
state = 0;
if (c >= 48/*0*/ && c <= 57/*9*/) {
m = c - 48/*0*/;
} else if (c >= 65/*A*/ && c <= 70/*F*/) {
m = c - 65/*A*/ + 10;
} else if (c >= 97/*a*/ && c <= 102/*f*/) {
m = c - 97/*a*/ + 10;
} else {
out[outIndex++] = charCode('%');
out[outIndex++] = 37/*%*/;
out[outIndex++] = hexchar;
out[outIndex++] = c;
break;
Expand Down