4

So I have this function to strip out scripts from the page, but some scripts that are many lines long are still showing up. Is there a way to remove all scripts from the page that loads.

 function filterData(data){

// filter all the nasties out
// no body tags
data = data.replace(/<?\/body[^>]*>/g,'');
// no linebreaks
data = data.replace(/[\r|\n]+/g,'');
// no comments
data = data.replace(/<--[\S\s]*?-->/g,'');
// no noscript blocks
data = data.replace(/<noscript[^>]*>[\S\s]*?<\/noscript>/g,'');
// no script blocks
data = data.replace(/<script[^>]*>[\S\s]*?<\/script>/g,'');
// no self closing scripts
data = data.replace(/<script.*\/>/,'');

// [... add as needed ...]
return data;
  }

Here is an example of the script that comes through in the html

<script type="text/javascript">
var ccKeywords="keyword=";
if (typeof(ccauds) != 'undefined')
{
 for (var cci = 0; cci < ccauds.Profile.Audiences.Audience.length; cci++)
{
  if (cci > 0) ccKeywords += "&keyword="; ccKeywords +=     ccauds.Profile.Audiences.Audience[cci].abbr;
}
}
</script>
3
  • <script type="text/javascript"> var ccKeywords="keyword="; if (typeof(ccauds) != 'undefined') { for (var cci = 0; cci < ccauds.Profile.Audiences.Audience.length; cci++) { if (cci > 0) ccKeywords += "&keyword="; ccKeywords += ccauds.Profile.Audiences.Audience[cci].abbr; } } </script> Commented Jun 12, 2012 at 15:54
  • Blynn, please put that in your original question, not in a comment. Commented Jun 12, 2012 at 15:56
  • I must admit I only ever use jQuery for this kind of thing... data = $("<div>" + data + "</div>").find('script').remove().end().html() Commented Jun 12, 2012 at 16:04

3 Answers 3

2

If I got you right, you need to remove all <script> tags with inner code from piece of HTML string. In this case you can try the following regular expression:

data.replace(/<script.*?>[\s\S]*?<\/script>/ig, "");

It should successfully work with one-liners and multi-liners, and does not affect other tags.

DEMO: http://jsfiddle.net/9jBSD/

Sign up to request clarification or add additional context in comments.

Comments

0

checkout sugar.js - http://sugarjs.com/

it has a removeTags method that should do what you want

http://sugarjs.com/api/String/removeTags

Comments

0
function filterData(data){
    var root = document.createElement("body");
    root.innerHTML = data;

    $(root).find("script,noscript").remove();

    function removeAttrs( node ) {
        $.each( node.attributes, function( index, attr ) {
            if( attr.name.toLowerCase().indexOf("on") === 0 ) {
                node.removeAttribute(attr.name);
            }
        });
    }

    function walk( root ) {
        removeAttrs(root);
        $( root.childNodes ).each( function() {
            if( this.nodeType === 3 ) {
                if( !$.trim( this.nodeValue ).length ) {
                    $(this).remove();
                }
            }
            else if( this.nodeType === 8 ) {
                $(this).remove();
            }
            else if( this.nodeType === 1 ) {
                walk(this);
            }
        });
    }

    walk(root);

    return root.innerHTML; 
}

filterData("<script>alert('hello');</script></noscript></script><div onclick='alert'>hello</div>\n\n<!-- comment -->");
//"<div>hello</div>"

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.