The `sources` table looks like:
Code: Select all
| Site | Feed |
Code: Select all
| ID | Title | Link | Date | Author | Category | Description | Content |
Parser:
Code: Select all
<?php
#-------------------#----------------------#
# htd0rg blogs # by drusepth #
#-------------------#----------------------#
# ported to php # 12/21/09 #
#-------------------#----------------------#
?>
<?php
# Connect to DB
$mysql_user = 'user';
$mysql_password = 'pass';
$mysql_location = 'localhost';
$mysql_db = 'db';
$mysql = mysql_connect($mysql_location, $mysql_user, $mysql_password)
or die('Couldn\'t connect to mySQL right now. Try back later.');
mysql_select_db($mysql_db)
or die('Couldn\'t connect to mySQL right now. Try back later.');
?>
<?php
# Feeds to get information from
$query = sprintf(
"SELECT * FROM `sources`"
);
$result = mysql_query($query)
or die('Couldn\'t connect to mySQL right now. Try back later.');
$sources = array();
while ($row = mysql_fetch_object($result)) {
$sources[$row->Site] = $row->Feed;
}
?>
<?php
# Main loop
foreach ($sources as $blog_title => $blog_feed) {
$feed = explode("\n", getFeed($blog_feed));
parseFeed($blog_title, $feed);
}
mysql_close($mysql);
?>
<?php
# Functions
function getFeed($feed_url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $feed_url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$contents = curl_exec($ch);
curl_close($ch);
return $contents;
}
function getMonth($name) {
$months = array(
'Jan' => 1,
'Feb' => 2,
'Mar' => 3,
'Apr' => 4,
'May' => 5,
'Jun' => 6,
'Jul' => 7,
'Aug' => 8,
'Sep' => 9,
'Oct' => 10,
'Nov' => 11,
'Dec' => 12
);
foreach ($months as $abbr => $key) {
if ($abbr == $name) {
return $key;
}
}
}
function parseFeed($siteTitle, $feed_contents) {
foreach ($feed_contents as $line) {
if (preg_match('/<item>/', $line, $match) > 0) {
# Clear remnants
$title = ""; $link = ""; $date = ""; $author = ""; $desc = "";
$content = ""; $category = "";
}
if (preg_match('/<title>(.*)<\/title>/', $line, $match) > 0) {
$title = $match[1];
}
if (preg_match('/<link>(.*)<\/link>/', $line, $match) > 0) {
$link = $match[1];
}
if (preg_match('/<pubDate>(.*)<\/pubDate>/', $line, $match) > 0) {
$date = $match[1];
# Thu, 19 Nov 2009 15:58:40 +0000
# is ugly as hell for sorting
$dateparts = explode(" ", $date);
$date = $dateparts[3] . " " . getMonth($dateparts[2]) . " " . $dateparts[1] . " " . $dateparts[4];
}
if (preg_match('/<dc:creator>(.*)<\/dc:creator>/', $line, $match) > 0) {
$author = $match[1];
}
if (preg_match('/<category>(.*)<\/category>/', $line, $match) > 0) {
$category = $match[1];
if (preg_match('/<!\[CDATA\[(.*)\]\]>/', $category, $match) > 0) {
$category = $match[1];
}
}
if (preg_match('/<description>(.*)<\/description>/', $line, $match) > 0) {
$desc = $match[1];
if (preg_match('/<!\[CDATA\[(.*)\]\]>/', $desc, $match) > 0) {
$desc = $match[1];
}
}
if (preg_match('/<content(.*)>(.*)<\/content>/', $line, $match) > 0) {
$content = $match[2];
if (preg_match('/<!\[CDATA\[(.*)\]\]>/', $content, $match) > 0) {
$content = $match[1];
}
}
# Put in db
if (preg_match('/<\/item>/', $line, $match) > 0) {
if (isset($_GET['output'])) {
echo "Found post \"$title\" by $author, posted at $siteTitle on $date. ($link)<br />";
}
# First check if we already have a record for this post
$query = sprintf(
"SELECT `ID` FROM `posts` WHERE `Title` = '%s' AND `Site` = '%s' AND `Author` = '%s' LIMIT 1",
$title, $siteTitle, $author
);
$result = mysql_query($query)
or die('Couldn\'t connect to mySQL right now. Try back later.');
if (mysql_num_rows($result) == 0) {
# If we don't have a record for this post, insert one
$query = sprintf(
"INSERT INTO `posts` (`ID`, `Title`, `Site`, `Link`, `Date`, `Author`, `Category`,
`Description`, `Content`) VALUES ('NULL', '$title', '$siteTitle', '$link', '$date',
'$author', '$category', '$desc', '$content');"
);
$result = mysql_query($query)
or die('Couldn\'t connect to mySQL right now. Try back later.');
}
}
}
}
?>
Code: Select all
<?php
#-------------------#----------------------#
# htd0rg blogs # by drusepth #
#-------------------#----------------------#
# front page # 12/21/09 #
#-------------------#----------------------#
?>
<?php
# Connect to DB
$mysql_user = 'user';
$mysql_password = 'pass';
$mysql_location = 'localhost';
$mysql_db = 'db';
$mysql = mysql_connect($mysql_location, $mysql_user, $mysql_password)
or die('Couldn\'t connect to mySQL right now. Try back later.');
mysql_select_db($mysql_db)
or die('Couldn\'t connect to mySQL right now. Try back later.');
?>
<?php
# Header
?>
<div style="text-align:center" id="header">
<h1>htd0rg blogs</h1>
<h5>Posts from blogs by htd0rg members</h5>
</div>
<?php
# Content
if (isset($_GET['author'])) {
$query = sprintf(
"SELECT * FROM `posts` WHERE `Author` = '%s' ORDER BY `Date` LIMIT 30",
mysql_real_escape_string($_GET['author'])
);
} elseif (isset($_GET['site'])) {
$query = sprintf(
"SELECT * FROM `posts` WHERE `Site` = '%s' ORDER BY `Date` LIMIT 30",
mysql_real_escape_string($_GET['site'])
);
} else {
$query = sprintf("SELECT * FROM `posts` ORDER BY `Date` LIMIT 30");
}
$result = mysql_query($query)
or die('Couldn\'t connect to mySQL right now. Try back later.');
while ($row = mysql_fetch_object($result)) {
# Title
$title = escapeHTML(html_entity_decode($row->Title));
$link = escapeHTML(html_entity_decode($row->Link));
echo '<h5 style="padding: 0x; margin:0px; margin-left: 5px;"><a href="' . $link . '">' . $title . '</a></h5>';
# Author & Meta
$author = escapeHTML(html_entity_decode($row->Author));
$date = escapeHTML(html_entity_decode($row->Date));
$site = escapeHTML(html_entity_decode($row->Site));
echo '<h6 style="padding: 0px; margin:0px; margin-left: 5px;">';
echo 'Posted by <a href="?author=' . $author . '">' . $author . '</a>';
echo ' at <a href="?site=' . $site . '">' . $site . '</a>';
echo ' on <em>' . $date . '</em></h6>';
# Teaser
$teaser = escapeHTML(html_entity_decode($row->Description));
echo '<p style="font-size:10px; margin-left: 15px;">' . $teaser . '</p>';
echo '<hr />';
}
?>
<?php
function escapeHTML($string) {
# Because htmlentities() and htmlspecialchars() replace & with &
$string = str_replace('<', '<', $string);
$string = str_replace('>', '>', $string);
return $string;
}
?>