I had a case where I needed to check partial html code for unmatched and malformed tags (mostly, eg </br>, a common error in my samples) and various heavy-duty validators were too much to use. So I ended up making my own custom validation routine in PHP, it is pasted below (you may need to use mb_substr instead of index-based character retrieval if you have text in different languages) (note it does not parse CDATA or script/style tags but can be extended easily):
function check_html( $html )
{
$stack = array();
$autoclosed = array('br', 'hr', 'input', 'embed', 'img', 'meta', 'link', 'param', 'source', 'track', 'area', 'base', 'col', 'wbr');
$l = strlen($html); $i = 0;
$incomment = false; $intag = false; $instring = false;
$closetag = false; $tag = '';
while($i<$l)
{
while($i<$l && preg_match('#\\s#', $c=$html[$i])) $i++;
if ( $i >= $l ) break;
if ( $incomment && ('-->' === substr($html, $i, 3)) )
{
// close comment
$incomment = false;
$i += 3;
continue;
}
$c = $html[$i++];
if ( '<' === $c )
{
if ( $incomment ) continue;
if ( $intag ) return false;
if ( '!--' === substr($html, $i, 3) )
{
// open comment
$incomment = true;
$i += 3;
continue;
}
// open tag
$intag = true;
if ( '/' === $html[$i] )
{
$i++;
$closetag = true;
}
else
{
$closetag = false;
}
$tag = '';
while($i<$l && preg_match('#[a-z0-9\\-]#i', $c=$html[$i]) )
{
$tag .= $c;
$i++;
}
if ( !strlen($tag) ) return false;
$tag = strtolower($tag);
if ( $i<$l && !preg_match('#[\\s/>]#', $html[$i]) ) return false;
if ( $i<$l && $closetag && preg_match('#^\\s*/>#sim', substr($html, $i)) ) return false;
if ( $closetag )
{
if ( in_array($tag, $autoclosed) || (array_pop($stack) !== $tag) )
return false;
}
else if ( !in_array($tag, $autoclosed) )
{
$stack[] = $tag;
}
}
else if ( '>' ===$c )
{
if ( $incomment ) continue;
// close tag
if ( !$intag ) return false;
$intag = false;
}
}
return !$incomment && !$intag && empty($stack);
}