revert: fix(ngSanitize): follow HTML parser rules for start tags / allow < in text content
This reverts commit 36d2658b94.
This commit broke the ci-checks task when ported into v1.2.x --- I will sort this out shortly.
This commit is contained in:
@@ -154,11 +154,11 @@ function sanitizeText(chars) {
|
||||
|
||||
// Regular Expressions for parsing tags and attributes
|
||||
var START_TAG_REGEXP =
|
||||
/^<((?:[a-zA-Z])[\w:-]*)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*(>?)/,
|
||||
END_TAG_REGEXP = /^<\/\s*([\w:-]+)[^>]*>/,
|
||||
/^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/,
|
||||
END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/,
|
||||
ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g,
|
||||
BEGIN_TAG_REGEXP = /^</,
|
||||
BEGING_END_TAGE_REGEXP = /^<\//,
|
||||
BEGING_END_TAGE_REGEXP = /^<\s*\//,
|
||||
COMMENT_REGEXP = /<!--(.*?)-->/g,
|
||||
DOCTYPE_REGEXP = /<!DOCTYPE([^>]*?)>/i,
|
||||
CDATA_REGEXP = /<!\[CDATA\[(.*?)]]>/g,
|
||||
@@ -232,11 +232,10 @@ function makeMap(str) {
|
||||
* @param {object} handler
|
||||
*/
|
||||
function htmlParser( html, handler ) {
|
||||
var index, chars, match, stack = [], last = html, text;
|
||||
var index, chars, match, stack = [], last = html;
|
||||
stack.last = function() { return stack[ stack.length - 1 ]; };
|
||||
|
||||
while ( html ) {
|
||||
text = '';
|
||||
chars = true;
|
||||
|
||||
// Make sure we're not in a script or style element
|
||||
@@ -275,23 +274,16 @@ function htmlParser( html, handler ) {
|
||||
match = html.match( START_TAG_REGEXP );
|
||||
|
||||
if ( match ) {
|
||||
// We only have a valid start-tag if there is a '>'.
|
||||
if ( match[4] ) {
|
||||
html = html.substring( match[0].length );
|
||||
match[0].replace( START_TAG_REGEXP, parseStartTag );
|
||||
}
|
||||
html = html.substring( match[0].length );
|
||||
match[0].replace( START_TAG_REGEXP, parseStartTag );
|
||||
chars = false;
|
||||
} else {
|
||||
// no ending tag found --- this piece should be encoded as an entity.
|
||||
text += '<';
|
||||
html = html.substring(1);
|
||||
}
|
||||
}
|
||||
|
||||
if ( chars ) {
|
||||
index = html.indexOf("<");
|
||||
|
||||
text += index < 0 ? html : html.substring( 0, index );
|
||||
var text = index < 0 ? html : html.substring( 0, index );
|
||||
html = index < 0 ? "" : html.substring( index );
|
||||
|
||||
if (handler.chars) handler.chars( decodeEntities(text) );
|
||||
|
||||
@@ -21,7 +21,6 @@ describe('HTML', function() {
|
||||
|
||||
var handler, start, text, comment;
|
||||
beforeEach(function() {
|
||||
text = "";
|
||||
handler = {
|
||||
start: function(tag, attrs, unary){
|
||||
start = {
|
||||
@@ -36,7 +35,7 @@ describe('HTML', function() {
|
||||
});
|
||||
},
|
||||
chars: function(text_){
|
||||
text += text_;
|
||||
text = text_;
|
||||
},
|
||||
end:function(tag) {
|
||||
expect(tag).toEqual(start.tag);
|
||||
@@ -82,31 +81,8 @@ describe('HTML', function() {
|
||||
expect(text).toEqual('text');
|
||||
});
|
||||
|
||||
it('should not treat "<" followed by a non-/ or non-letter as a tag', function() {
|
||||
expectHTML('<- text1 text2 <1 text1 text2 <{', handler).
|
||||
toBe('<- text1 text2 <1 text1 text2 <{');
|
||||
});
|
||||
|
||||
it('should throw badparse if text content contains "<" followed by "/" without matching ">"', function() {
|
||||
expect(function() {
|
||||
htmlParser('foo </ bar', handler);
|
||||
}).toThrowMinErr('$sanitize', 'badparse', 'The sanitizer was unable to parse the following block of html: </ bar');
|
||||
});
|
||||
|
||||
it('should throw badparse if text content contains "<" followed by an ASCII letter without matching ">"', function() {
|
||||
expect(function() {
|
||||
htmlParser('foo <a bar', handler);
|
||||
}).toThrowMinErr('$sanitize', 'badparse', 'The sanitizer was unable to parse the following block of html: <a bar');
|
||||
});
|
||||
|
||||
it('should accept tag delimiters such as "<" inside real tags', function() {
|
||||
// Assert that the < is part of the text node content, and not part of a tag name.
|
||||
htmlParser('<p> 10 < 100 </p>', handler);
|
||||
expect(text).toEqual(' 10 < 100 ');
|
||||
});
|
||||
|
||||
it('should parse newlines in tags', function() {
|
||||
htmlParser('<tag\n attr="value"\n>text</\ntag\n>', handler);
|
||||
htmlParser('<\ntag\n attr="value"\n>text<\n/\ntag\n>', handler);
|
||||
expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false});
|
||||
expect(text).toEqual('text');
|
||||
});
|
||||
@@ -147,9 +123,8 @@ describe('HTML', function() {
|
||||
expectHTML('a<!DocTyPe html>c.').toEqual('ac.');
|
||||
});
|
||||
|
||||
it('should escape non-start tags', function() {
|
||||
expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').
|
||||
toBe('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.');
|
||||
it('should remove nested script', function() {
|
||||
expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').toEqual('ac.');
|
||||
});
|
||||
|
||||
it('should remove attrs', function() {
|
||||
@@ -190,16 +165,14 @@ describe('HTML', function() {
|
||||
expectHTML(everything).toEqual(everything);
|
||||
});
|
||||
|
||||
it('should mangle improper html', function() {
|
||||
// This text is encoded more than a real HTML parser would, but it should render the same.
|
||||
it('should handle improper html', function() {
|
||||
expectHTML('< div rel="</div>" alt=abc dir=\'"\' >text< /div>').
|
||||
toBe('< div rel="" alt=abc dir=\'"\' >text< /div>');
|
||||
toEqual('<div rel="</div>" alt="abc" dir=""">text</div>');
|
||||
});
|
||||
|
||||
it('should mangle improper html2', function() {
|
||||
// A proper HTML parser would clobber this more in most cases, but it looks reasonable.
|
||||
it('should handle improper html2', function() {
|
||||
expectHTML('< div rel="</div>" / >').
|
||||
toBe('< div rel="" / >');
|
||||
toEqual('<div rel="</div>"/>');
|
||||
});
|
||||
|
||||
it('should ignore back slash as escape', function() {
|
||||
@@ -222,12 +195,6 @@ describe('HTML', function() {
|
||||
expectHTML('\na\n').toEqual(' a ');
|
||||
});
|
||||
|
||||
it('should accept tag delimiters such as "<" inside real tags (with nesting)', function() {
|
||||
//this is an integrated version of the 'should accept tag delimiters such as "<" inside real tags' test
|
||||
expectHTML('<p> 10 < <span>100</span> </p>')
|
||||
.toEqual('<p> 10 < <span>100</span> </p>');
|
||||
});
|
||||
|
||||
describe('htmlSanitizerWriter', function() {
|
||||
/* global htmlSanitizeWriter: false */
|
||||
if (angular.isUndefined(window.htmlSanitizeWriter)) return;
|
||||
|
||||
Reference in New Issue
Block a user