Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
Next Next commit
test: pull enconding WPT test fixtures
  • Loading branch information
joyeecheung committed Jan 2, 2019
commit 57abb93fa6bf79e3376a8430a10f4e37aa14a671
5 changes: 3 additions & 2 deletions test/fixtures/wpt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ See [test/wpt](../../wpt/README.md) for information on how these tests are run.

Last update:

- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
- interfaces: https://github.com/web-platform-tests/wpt/tree/db7f86289e/interfaces
- console: https://github.com/web-platform-tests/wpt/tree/9786a4b131/console
- encoding: https://github.com/web-platform-tests/wpt/tree/a093a659ed/encoding
- url: https://github.com/web-platform-tests/wpt/tree/75b0f336c5/url
- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
- interfaces: https://github.com/web-platform-tests/wpt/tree/712c9f275e/interfaces

[Web Platform Tests]: https://github.com/web-platform-tests/wpt
[`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/master/docs/git-node.md#git-node-wpt
4 changes: 4 additions & 0 deletions test/fixtures/wpt/encoding/META.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
spec: https://encoding.spec.whatwg.org/
suggested_reviewers:
- inexorabletash
- annevk
52 changes: 52 additions & 0 deletions test/fixtures/wpt/encoding/api-basics.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// META: title=Encoding API: Basics

test(function() {
assert_equals((new TextEncoder).encoding, 'utf-8', 'default encoding is utf-8');
assert_equals((new TextDecoder).encoding, 'utf-8', 'default encoding is utf-8');
}, 'Default encodings');

test(function() {
assert_array_equals(new TextEncoder().encode(), [], 'input default should be empty string')
assert_array_equals(new TextEncoder().encode(undefined), [], 'input default should be empty string')
}, 'Default inputs');


function testDecodeSample(encoding, string, bytes) {
test(function() {
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
}, 'Decode sample: ' + encoding);
}

// z (ASCII U+007A), cent (Latin-1 U+00A2), CJK water (BMP U+6C34),
// G-Clef (non-BMP U+1D11E), PUA (BMP U+F8FF), PUA (non-BMP U+10FFFD)
// byte-swapped BOM (non-character U+FFFE)
var sample = 'z\xA2\u6C34\uD834\uDD1E\uF8FF\uDBFF\uDFFD\uFFFE';

test(function() {
var encoding = 'utf-8';
var string = sample;
var bytes = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xEF, 0xA3, 0xBF, 0xF4, 0x8F, 0xBF, 0xBD, 0xEF, 0xBF, 0xBE];
var encoded = new TextEncoder().encode(string);
assert_array_equals([].slice.call(encoded), bytes);
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
}, 'Encode/decode round trip: utf-8');

testDecodeSample(
'utf-16le',
sample,
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
);

testDecodeSample(
'utf-16be',
sample,
[0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xF8, 0xFF, 0xDB, 0xFF, 0xDF, 0xFD, 0xFF, 0xFE]
);

testDecodeSample(
'utf-16',
sample,
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
);
24 changes: 24 additions & 0 deletions test/fixtures/wpt/encoding/api-invalid-label.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// META: title=Encoding API: invalid label
// META: timeout=long
// META: script=resources/encodings.js

var tests = ["invalid-invalidLabel"];
setup(function() {
encodings_table.forEach(function(section) {
section.encodings.forEach(function(encoding) {
encoding.labels.forEach(function(label) {
["\u0000", "\u000b", "\u00a0", "\u2028", "\u2029"].forEach(function(ws) {
tests.push(ws + label);
tests.push(label + ws);
tests.push(ws + label + ws);
});
});
});
});
});

tests.forEach(function(input) {
test(function() {
assert_throws(new RangeError(), function() { new TextDecoder(input); });
}, 'Invalid label ' + format_value(input) + ' should be rejected by TextDecoder.');
});
15 changes: 15 additions & 0 deletions test/fixtures/wpt/encoding/api-replacement-encodings.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// META: title=Encoding API: replacement encoding
// META: script=resources/encodings.js

encodings_table.forEach(function(section) {
section.encodings.filter(function(encoding) {
return encoding.name === 'replacement';
}).forEach(function(encoding) {
encoding.labels.forEach(function(label) {
test(function() {
assert_throws(new RangeError(), function() { new TextDecoder(label); });
}, 'Label for "replacement" should be rejected by API: ' + label);
});
});
});

48 changes: 48 additions & 0 deletions test/fixtures/wpt/encoding/api-surrogates-utf8.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// META: title=Encoding API: Invalid UTF-16 surrogates with UTF-8 encoding

var badStrings = [
{
input: 'abc123',
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
decoded: 'abc123',
name: 'Sanity check'
},
{
input: '\uD800',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (low)'
},
{
input: '\uDC00',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (high)'
},
{
input: 'abc\uD800123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (low), in a string'
},
{
input: 'abc\uDC00123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (high), in a string'
},
{
input: '\uDC00\uD800',
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
decoded: '\uFFFD\uFFFD',
name: 'Wrong order'
}
];

badStrings.forEach(function(t) {
test(function() {
var encoded = new TextEncoder().encode(t.input);
assert_array_equals([].slice.call(encoded), t.expected);
assert_equals(new TextDecoder('utf-8').decode(encoded), t.decoded);
}, 'Invalid surrogates encoded into UTF-8: ' + t.name);
});
33 changes: 33 additions & 0 deletions test/fixtures/wpt/encoding/big5-encoder.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<!doctype html>
<meta charset=big5> <!-- test breaks if the server overrides this -->
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<div id=log></div>
<script>
function encode(input, output, desc) {
test(function() {
var a = document.createElement("a"); // <a> uses document encoding for URL's query
// Append and prepend X to test for off-by-one errors
a.href = "https://example.com/?X" + input + "X";
assert_equals(a.search.substr(1), "X" + output + "X"); // remove leading "?"
}, "big5 encoder: " + desc);
}

encode("ab", "ab", "very basic")
// edge cases
encode("\u9EA6", "%26%2340614%3B", "Highest-pointer BMP character excluded from encoder");
encode("\uD858\uDE6B", "%26%23156267%3B", "Highest-pointer character excluded from encoder");
encode("\u3000", "%A1@", "Lowest-pointer character included in encoder");
encode("\u20AC", "%A3%E1", "Euro; the highest-pointer character before a range of 30 unmapped pointers");
encode("\u4E00", "%A4@", "The lowest-pointer character after the range of 30 unmapped pointers");
encode("\uD85D\uDE07", "%C8%A4", "The highest-pointer character before a range of 41 unmapped pointers");
encode("\uFFE2", "%C8%CD", "The lowest-pointer character after the range of 41 unmapped pointers");
encode("\u79D4", "%FE%FE", "The last character in the index");
// not in index
encode("\u2603", "%26%239731%3B", "The canonical BMP test character that is not in the index");
encode("\uD83D\uDCA9", "%26%23128169%3B", "The canonical astral test character that is not in the index");
// duplicate low bits
encode("\uD840\uDFB5", "%FDj", "A Plane 2 character whose low 16 bits match a BMP character that has a lower pointer");
// prefer last
encode("\u2550", "%F9%F9", "A duplicate-mapped code point that prefers the highest pointer in the encoder");
</script>
4 changes: 4 additions & 0 deletions test/fixtures/wpt/encoding/eof-shift_jis-ref.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<!doctype html>
<meta charset=shift_jis>
<title>Shift_JIS file ending with a truncated sequence</title>
One-byte truncated sequence:&#xFFFD;
5 changes: 5 additions & 0 deletions test/fixtures/wpt/encoding/eof-shift_jis.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<!doctype html>
<meta charset=shift_jis>
<title>Shift_JIS file ending with a truncated sequence</title>
<link rel=match href=/encoding/eof-shift_jis-ref.html>
One-byte truncated sequence:�
4 changes: 4 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-one-ref.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a one-byte truncated sequence</title>
One-byte truncated sequence:&#xFFFD;
5 changes: 5 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-one.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a one-byte truncated sequence</title>
<link rel=match href="eof-utf-8-one-ref.html">
One-byte truncated sequence:�
4 changes: 4 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-three-ref.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a three-byte truncated sequence</title>
Three-byte truncated sequence:&#xFFFD;
5 changes: 5 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-three.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a three-byte truncated sequence</title>
<link rel=match href="eof-utf-8-three-ref.html">
Three-byte truncated sequence:�
4 changes: 4 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-two-ref.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a two-byte truncated sequence</title>
Two-byte truncated sequence:&#xFFFD;
5 changes: 5 additions & 0 deletions test/fixtures/wpt/encoding/eof-utf-8-two.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<!doctype html>
<meta charset=utf-8>
<title>UTF-8 file ending with a two-byte truncated sequence</title>
<link rel=match href="eof-utf-8-two-ref.html">
Two-byte truncated sequence:�
21 changes: 21 additions & 0 deletions test/fixtures/wpt/encoding/gb18030-encoder.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<!doctype html>
<meta charset=gb18030> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<div id=log></div>
<script>
function encode(input, output, desc) {
test(function() {
var a = document.createElement("a") // <a> uses document encoding for URL's query
a.href = "https://example.com/?" + input
assert_equals(a.search.substr(1), output) // remove leading "?"
}, "gb18030 encoder: " + desc)
}

encode("s", "s", "very basic")
encode("\u20AC", "%A2%E3", "Euro")
encode("\u4E02", "%81@", "character")
encode("\uE4C6", "%A1@", "PUA")
encode("\uE4C5", "%FE%FE", "PUA #2")
encode("\ud83d\udca9", "%949%DA3", "poo")
</script>
21 changes: 21 additions & 0 deletions test/fixtures/wpt/encoding/gbk-encoder.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<!doctype html>
<meta charset=gbk> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<div id=log></div>
<script>
function encode(input, output, desc) {
test(function() {
var a = document.createElement("a") // <a> uses document encoding for URL's query
a.href = "https://example.com/?" + input
assert_equals(a.search.substr(1), output) // remove leading "?"
}, "gbk encoder: " + desc)
}

encode("s", "s", "very basic")
encode("\u20AC", "%80", "Euro")
encode("\u4E02", "%81@", "character")
encode("\uE4C6", "%A1@", "PUA")
encode("\uE4C5", "%FE%FE", "PUA #2")
encode("\ud83d\udca9", "%26%23128169%3B", "poo")
</script>
14 changes: 14 additions & 0 deletions test/fixtures/wpt/encoding/idlharness.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// META: global=window,worker
// META: script=/resources/WebIDLParser.js
// META: script=/resources/idlharness.js

idl_test(
['encoding'],
[], // No deps
idl_array => {
idl_array.add_objects({
TextEncoder: ['new TextEncoder()'],
TextDecoder: ['new TextDecoder()']
});
}
);
50 changes: 50 additions & 0 deletions test/fixtures/wpt/encoding/iso-2022-jp-decoder.any.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
function decode(input, output, desc) {
test(function() {
var d = new TextDecoder("iso-2022-jp"),
buffer = new ArrayBuffer(input.length),
view = new Int8Array(buffer)
for(var i = 0, l = input.length; i < l; i++) {
view[i] = input[i]
}
assert_equals(d.decode(view), output)
}, "iso-2022-jp decoder: " + desc)
}
decode([0x1b, 0x24], "�$", "Error ESC")
decode([0x1b, 0x24, 0x50], "�$P", "Error ESC, character")
decode([0x1b, 0x28, 0x42, 0x50], "P", "ASCII ESC, character")
decode([0x1b, 0x28, 0x42, 0x1b, 0x28, 0x42, 0x50], "�P", "Double ASCII ESC, character")
decode([0x50, 0x1b, 0x28, 0x42, 0x50], "PP", "character, ASCII ESC, character")
decode([0x5C, 0x5D, 0x7E], "\\]~", "characters")
decode([0x0D, 0x0E, 0x0F, 0x10], "\x0D��\x10", "SO / SI")

decode([0x1b, 0x28, 0x4A, 0x5C, 0x5D, 0x7E], "¥]‾", "Roman ESC, characters")
decode([0x1b, 0x28, 0x4A, 0x0D, 0x0E, 0x0F, 0x10], "\x0D��\x10", "Roman ESC, SO / SI")
decode([0x1b, 0x28, 0x4A, 0x1b, 0x1b, 0x28, 0x49, 0x50], "�ミ", "Roman ESC, error ESC, Katakana ESC")

decode([0x1b, 0x28, 0x49, 0x50], "ミ", "Katakana ESC, character")
decode([0x1b, 0x28, 0x49, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Katakana ESC, multibyte ESC, character")
decode([0x1b, 0x28, 0x49, 0x1b, 0x50], "�ミ", "Katakana ESC, error ESC, character")
decode([0x1b, 0x28, 0x49, 0x1b, 0x24, 0x50], "�、ミ", "Katakana ESC, error ESC #2, character")
decode([0x1b, 0x28, 0x49, 0x50, 0x1b, 0x28, 0x49, 0x50], "ミミ", "Katakana ESC, character, Katakana ESC, character")
decode([0x1b, 0x28, 0x49, 0x0D, 0x0E, 0x0F, 0x10], "����", "Katakana ESC, SO / SI")

decode([0x1b, 0x24, 0x40, 0x50, 0x50], "佩", "Multibyte ESC, character")
decode([0x1b, 0x24, 0x42, 0x50, 0x50], "佩", "Multibyte ESC #2, character")
decode([0x1b, 0x24, 0x42, 0x1b, 0x50, 0x50], "�佩", "Multibyte ESC, error ESC, character")
decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x40], "�", "Double multibyte ESC")
decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Double multibyte ESC, character")
decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x42, 0x50, 0x50], "�佩", "Double multibyte ESC #2, character")
decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x50, 0x50], "�ば�", "Multibyte ESC, error ESC #2, character")

decode([0x1b, 0x24, 0x40, 0x50, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Multibyte ESC, single byte, multibyte ESC, character")
decode([0x1b, 0x24, 0x40, 0x20, 0x50], "��", "Multibyte ESC, lead error byte")
decode([0x1b, 0x24, 0x40, 0x50, 0x20], "�", "Multibyte ESC, trail error byte")

decode([0x50, 0x1b], "P�", "character, error ESC")
decode([0x50, 0x1b, 0x24], "P�$", "character, error ESC #2")
decode([0x50, 0x1b, 0x50], "P�P", "character, error ESC #3")
decode([0x50, 0x1b, 0x28, 0x42], "P", "character, ASCII ESC")
decode([0x50, 0x1b, 0x28, 0x4A], "P", "character, Roman ESC")
decode([0x50, 0x1b, 0x28, 0x49], "P", "character, Katakana ESC")
decode([0x50, 0x1b, 0x24, 0x40], "P", "character, Multibyte ESC")
decode([0x50, 0x1b, 0x24, 0x42], "P", "character, Multibyte ESC #2")
19 changes: 19 additions & 0 deletions test/fixtures/wpt/encoding/iso-2022-jp-encoder.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<!doctype html>
<meta charset=iso-2022-jp> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<div id=log></div>
<script>
function encode(input, output, desc) {
test(function() {
var a = document.createElement("a") // <a> uses document encoding for URL's query
a.href = "https://example.com/?" + input
assert_equals(a.search.substr(1), output) // remove leading "?"
}, "iso-2022-jp encoder: " + desc)
}

encode("s", "s", "very basic")
encode("\u00A5\u203Es\\\uFF90\u4F69", "%1B(J\\~s%1B(B\\%1B$B%_PP%1B(B", "basics")
encode("\x0E\x0F\x1Bx", "%0E%0F%1Bx", "SO/SI ESC")
encode("\uFFFD", "%26%2365533%3B", "U+FFFD");
</script>
Loading