#!/usr/bin/env ruby require 'open-uri' require 'rubygems' require 'html/tree' require 'html/xpath' require 'feed_tools' DIR_PREFIX = '/home/joelmichael/stuff.ediblepet.net/feeds' uri = URI.parse('http://70.86.201.113/imageserv2/temporary/archivelist.html') remote_html = uri.read cached_html_file = File.open("#{DIR_PREFIX}/cache/pbf.html", 'r+') unless remote_html == cached_html_file.read cached_html_file.truncate 0 cached_html_file.seek 0 cached_html_file.write remote_html parser = HTMLTree::Parser.new parser.feed remote_html anchors = parser.tree.rexml_match('//html/body/font/center/a') feed = FeedTools::Feed.new feed.id = 'http://stuff.ediblepet.net/feeds/pbf.xml' feed.link = 'http://cheston.com/pbf/archive.html' feed.title = 'Perry Bible Fellowship' feed.feed_type = 'atom' feed.author = 'Nicholas Gurewitch' # No dates are provided, so sadly this is a one-entry feed # unless I decide to make a cacheing system anchor = anchors.shift item = FeedTools::FeedItem.new item.author = 'Nicholas Gurewitch' filename = anchor.attribute('href').value dir = "http://70.86.201.113/imageserv2/temporary" item.id = "#{dir}/#{filename}" item.link = item.id image_page_uri = URI.parse(item.link) image_page_html = image_page_uri.read doc = REXML::Document.new image_page_html image = REXML::XPath.match(doc, '//img').shift image_src = image.attribute('src').value item.title = anchor.text item.content = "\"#{item.title}\"/" feed << item xml_output_file = File.open("#{DIR_PREFIX}/pbf.xml", 'w') xml_output_file.write(feed.build_xml) xml_output_file.close end cached_html_file.close