Parsing HTML to find Links

Method 1: using regular expression

	$regexp = "<a\s[^>]*\shref\s*=\s*\"([^\"]*)\"[^>]*>(.*)<\/a>"; //better version
	if(preg_match_all("/$regexp/msiU", $result, $matches, PREG_SET_ORDER)) {
		foreach($matches as $match){
			echo htmlspecialchars($match[1]);
			echo " - ";
			echo htmlspecialchars($match[2]);
			echo "<br />\r\n";


Method 2: DOM

//handle utf-8 encoding
$result = mb_convert_encoding($result, 'utf-8', mb_detect_encoding($result));
$result = mb_convert_encoding($result, 'html-entities', 'utf-8');

//load the html string into the DOMDocument

//get a list of all <A> tags | header link for hkgolden
$a = $DOM->getElementsByTagName('a');

//loop through all <A> tags
foreach($a as $link){
//echo out the href attribute of the <A> tag.
echo $link->getAttribute('href').'<br />';



