From 3d0b49c07f10c0a723c91629c63705647b690d81 Mon Sep 17 00:00:00 2001 From: Yutaka Yamaguchi Date: Sun, 30 Mar 2014 06:45:36 +0900 Subject: [PATCH] fix(ngSanitize): encode surrogate pair properly The encodeEndities function encode non-alphanumeric characters to entities with charCodeAt. charCodeAt does not return one value when their unicode codeponts is higher than 65,356. It returns surrogate pair, and this is why the Emoji which has higher codepoints is garbled. We need to handle them properly. Closes #5088 Closes #6911 --- src/ngSanitize/sanitize.js | 6 ++++++ test/ngSanitize/sanitizeSpec.js | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js index 38d088bbe..74441b3bf 100644 --- a/src/ngSanitize/sanitize.js +++ b/src/ngSanitize/sanitize.js @@ -161,6 +161,7 @@ var START_TAG_REGEXP = COMMENT_REGEXP = //g, DOCTYPE_REGEXP = /]*?)>/i, CDATA_REGEXP = //g, + SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g, // Match everything outside of normal chars and " (quote character) NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; @@ -399,6 +400,11 @@ function decodeEntities(value) { function encodeEntities(value) { return value. replace(/&/g, '&'). + replace(SURROGATE_PAIR_REGEXP, function (value) { + var hi = value.charCodeAt(0); + var low = value.charCodeAt(1); + return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';'; + }). replace(NON_ALPHANUMERIC_REGEXP, function(value){ return '&#' + value.charCodeAt(0) + ';'; }). diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js index fbffbba5c..a086d2f75 100644 --- a/test/ngSanitize/sanitizeSpec.js +++ b/test/ngSanitize/sanitizeSpec.js @@ -239,6 +239,11 @@ describe('HTML', function() { expect(html).toEqual('
'); }); + it('should handle surrogate pair', function() { + writer.chars(String.fromCharCode(55357, 56374)); + expect(html).toEqual('🐶'); + }); + describe('explicitly disallow', function() { it('should not allow attributes', function() { writer.start('div', {id:'a', name:'a', style:'a'});