ADB DIY RSS

So I was thinking, wouldn’t it be nice if the Australian Dictionary of Biography’s ‘born on this day‘ feature could be made available as an RSS feed. Every morning you’d get a new list of biographies delivered direct to your feed reader. And so…

[sounds of xpath wrangling and PHP coding]

here it is.

It’s pretty simple – it harvests all the links of people born on the current day, then loops through the links to gather the first paragraph of each biography. Then it’s just a matter of writing everything to an RSS file.

In case you missed it, I also created a Media RSS feed for portrait images used in the ADB. This enables them to be viewed in CoolIris.

Code follows…

<?php
function getPage($url, $ch) {
	curl_setopt($ch, CURLOPT_URL,$url);
	$html= curl_exec($ch);
	if (!$html) {
		echo "cURL error number:" .curl_errno($ch);
		echo "cURL error:" . curl_error($ch);
		exit;
	}
	return $html;
}
$url = "http://www.adb.online.anu.edu.au/scripts/adbp-births-deaths.php";
$userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';

$ch = curl_init();
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$html = getPage($url, $ch);

$dom = new DOMDocument();
@$dom->loadHTML($html);

$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("//ul[@class='pb-results'][1]/li/a");
$titles = $xpath->evaluate("//ul[@class='pb-results'][1]/li/a/text()");

echo "<?xml version='1.0'?>\n";
echo "<rss version='2.0'>\n";
echo "<channel>\n";
echo "\n";
echo "
<link>http://www.adb.online.anu.edu.au/scripts/adbp-births-deaths.php</link>\n";
echo "<description>A list of all those people in the Australian Dictionary of Biography who were born on this day.</description>\n";
for ($i = 0; $i < $hrefs->length; $i++) {
	$href = $hrefs->item($i);
	$title = $href->nodeValue;
	$bio = "";
	$url = "http://www.adb.online.anu.edu.au" . substr($href->getAttribute('href'),2);
	$html = getPage($url, $ch);
	$dom = new DOMDocument();
	@$dom->loadHTML($html);
	$xpath = new DOMXPath($dom);
	$paras = $xpath->evaluate("//div[@id='content']/p[1]/text()");
	foreach ($paras as $para) {
		$bio .= $para->nodeValue;
	}
	$bio .= "...";
	$bio = htmlspecialchars($bio, ENT_QUOTES);
	$bio = str_replace('\n', '', $bio);
	echo "<item>\n";
	echo "\n";
	echo "
<link>$url</link>\n";
	echo "<description>$bio</description>\n";
	echo "</item>\n";
}
echo "</channel>\n";
?>
Share this:
  • Digg
  • del.icio.us
  • Facebook
  • Google Bookmarks
  • StumbleUpon
  • Tumblr

Post a Comment

Your email is never published nor shared. Required fields are marked *