fix(ngSanitize): encode surrogate pair properly
The encodeEndities function encode non-alphanumeric characters to entities with charCodeAt. charCodeAt does not return one value when their unicode codeponts is higher than 65,356. It returns surrogate pair, and this is why the Emoji which has higher codepoints is garbled. We need to handle them properly. Closes #5088 Closes #6911
This commit is contained in:
committed by
Caitlin Potter
parent
b6aec5642e
commit
3d0b49c07f
@@ -161,6 +161,7 @@ var START_TAG_REGEXP =
|
||||
COMMENT_REGEXP = /<!--(.*?)-->/g,
|
||||
DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
|
||||
CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
|
||||
SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
|
||||
// Match everything outside of normal chars and " (quote character)
|
||||
NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
|
||||
|
||||
@@ -399,6 +400,11 @@ function decodeEntities(value) {
|
||||
function encodeEntities(value) {
|
||||
return value.
|
||||
replace(/&/g, '&').
|
||||
replace(SURROGATE_PAIR_REGEXP, function (value) {
|
||||
var hi = value.charCodeAt(0);
|
||||
var low = value.charCodeAt(1);
|
||||
return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
|
||||
}).
|
||||
replace(NON_ALPHANUMERIC_REGEXP, function(value){
|
||||
return '&#' + value.charCodeAt(0) + ';';
|
||||
}).
|
||||
|
||||
@@ -239,6 +239,11 @@ describe('HTML', function() {
|
||||
expect(html).toEqual('<div>');
|
||||
});
|
||||
|
||||
it('should handle surrogate pair', function() {
|
||||
writer.chars(String.fromCharCode(55357, 56374));
|
||||
expect(html).toEqual('🐶');
|
||||
});
|
||||
|
||||
describe('explicitly disallow', function() {
|
||||
it('should not allow attributes', function() {
|
||||
writer.start('div', {id:'a', name:'a', style:'a'});
|
||||
|
||||
Reference in New Issue
Block a user