From 25d3d3730d2965f6462df58f66e0928fbf304268 Mon Sep 17 00:00:00 2001 From: Caitlin Potter Date: Wed, 16 Jul 2014 18:13:17 -0400 Subject: [PATCH] revert: fix(ngSanitize): follow HTML parser rules for start tags / allow < in text content This reverts commit 36d2658b94b753d0e7d93af1e923ac5dbf74423b. This commit broke the ci-checks task when ported into v1.2.x --- I will sort this out shortly. --- src/ngSanitize/sanitize.js | 22 +++++---------- test/ngSanitize/sanitizeSpec.js | 49 ++++++--------------------------- 2 files changed, 15 insertions(+), 56 deletions(-) diff --git a/src/ngSanitize/sanitize.js b/src/ngSanitize/sanitize.js index de6dd3dd1..fae50aac2 100644 --- a/src/ngSanitize/sanitize.js +++ b/src/ngSanitize/sanitize.js @@ -154,11 +154,11 @@ function sanitizeText(chars) { // Regular Expressions for parsing tags and attributes var START_TAG_REGEXP = - /^<((?:[a-zA-Z])[\w:-]*)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*(>?)/, - END_TAG_REGEXP = /^<\/\s*([\w:-]+)[^>]*>/, + /^<\s*([\w:-]+)((?:\s+[\w:-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/, + END_TAG_REGEXP = /^<\s*\/\s*([\w:-]+)[^>]*>/, ATTR_REGEXP = /([\w:-]+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g, BEGIN_TAG_REGEXP = /^/g, DOCTYPE_REGEXP = /]*?)>/i, CDATA_REGEXP = //g, @@ -232,11 +232,10 @@ function makeMap(str) { * @param {object} handler */ function htmlParser( html, handler ) { - var index, chars, match, stack = [], last = html, text; + var index, chars, match, stack = [], last = html; stack.last = function() { return stack[ stack.length - 1 ]; }; while ( html ) { - text = ''; chars = true; // Make sure we're not in a script or style element @@ -275,23 +274,16 @@ function htmlParser( html, handler ) { match = html.match( START_TAG_REGEXP ); if ( match ) { - // We only have a valid start-tag if there is a '>'. - if ( match[4] ) { - html = html.substring( match[0].length ); - match[0].replace( START_TAG_REGEXP, parseStartTag ); - } + html = html.substring( match[0].length ); + match[0].replace( START_TAG_REGEXP, parseStartTag ); chars = false; - } else { - // no ending tag found --- this piece should be encoded as an entity. - text += '<'; - html = html.substring(1); } } if ( chars ) { index = html.indexOf("<"); - text += index < 0 ? html : html.substring( 0, index ); + var text = index < 0 ? html : html.substring( 0, index ); html = index < 0 ? "" : html.substring( index ); if (handler.chars) handler.chars( decodeEntities(text) ); diff --git a/test/ngSanitize/sanitizeSpec.js b/test/ngSanitize/sanitizeSpec.js index 5f2594397..76a480875 100644 --- a/test/ngSanitize/sanitizeSpec.js +++ b/test/ngSanitize/sanitizeSpec.js @@ -21,7 +21,6 @@ describe('HTML', function() { var handler, start, text, comment; beforeEach(function() { - text = ""; handler = { start: function(tag, attrs, unary){ start = { @@ -36,7 +35,7 @@ describe('HTML', function() { }); }, chars: function(text_){ - text += text_; + text = text_; }, end:function(tag) { expect(tag).toEqual(start.tag); @@ -82,31 +81,8 @@ describe('HTML', function() { expect(text).toEqual('text'); }); - it('should not treat "<" followed by a non-/ or non-letter as a tag', function() { - expectHTML('<- text1 text2 <1 text1 text2 <{', handler). - toBe('<- text1 text2 <1 text1 text2 <{'); - }); - - it('should throw badparse if text content contains "<" followed by "/" without matching ">"', function() { - expect(function() { - htmlParser('foo "', function() { - expect(function() { - htmlParser('foo 10 < 100

', handler); - expect(text).toEqual(' 10 < 100 '); - }); - it('should parse newlines in tags', function() { - htmlParser('text', handler); + htmlParser('<\ntag\n attr="value"\n>text<\n/\ntag\n>', handler); expect(start).toEqual({tag:'tag', attrs:{attr:'value'}, unary:false}); expect(text).toEqual('text'); }); @@ -147,9 +123,8 @@ describe('HTML', function() { expectHTML('ac.').toEqual('ac.'); }); - it('should escape non-start tags', function() { - expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.'). - toBe('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.'); + it('should remove nested script', function() { + expectHTML('a< SCRIPT >A< SCRIPT >evil< / scrIpt >B< / scrIpt >c.').toEqual('ac.'); }); it('should remove attrs', function() { @@ -190,16 +165,14 @@ describe('HTML', function() { expectHTML(everything).toEqual(everything); }); - it('should mangle improper html', function() { - // This text is encoded more than a real HTML parser would, but it should render the same. + it('should handle improper html', function() { expectHTML('< div rel="" alt=abc dir=\'"\' >text< /div>'). - toBe('< div rel="" alt=abc dir=\'"\' >text< /div>'); + toEqual('
text
'); }); - it('should mangle improper html2', function() { - // A proper HTML parser would clobber this more in most cases, but it looks reasonable. + it('should handle improper html2', function() { expectHTML('< div rel="" / >'). - toBe('< div rel="" / >'); + toEqual('
'); }); it('should ignore back slash as escape', function() { @@ -222,12 +195,6 @@ describe('HTML', function() { expectHTML('\na\n').toEqual(' a '); }); - it('should accept tag delimiters such as "<" inside real tags (with nesting)', function() { - //this is an integrated version of the 'should accept tag delimiters such as "<" inside real tags' test - expectHTML('

10 < 100

') - .toEqual('

10 < 100

'); - }); - describe('htmlSanitizerWriter', function() { /* global htmlSanitizeWriter: false */ if (angular.isUndefined(window.htmlSanitizeWriter)) return;