*/ $baseUrl = "http://unflyingobject.com/blog"; $rwSitePath = "/Users/filipp/Sites/unflyingobject.com/files"; $categories = array ( "Random" => 1, "Code" => 2, "Software" => 3, "OS X Server" => 4, "Mac OS X" => 5, "Hardware" => 6, "Debian" => 7, "OnTrees" => 8, "Media" => 9, "Uncategorised" => 10); /// That's all you should have to edit /// include "../lib/boeuf.php"; init (); global $c; $rwSiteDir = opendir ($rwSitePath); $i = 0; while (false !== ($file = readdir ($rwSiteDir))) { $fp = $rwSitePath . "/$file"; if (preg_match ('/\.html$/', $file) && !preg_match ('/archive|category-\d+\.html/', $file)) { echo "* Importing: $file\n"; $html = file_get_contents ($fp); preg_match ('/
(.*?)<\/div>/s', $html, $match); $title = sanitize (trim ($match[1])); preg_match ('/(\d{2})\.(\d{2})\.(\d{2}) (\d{2}):(\d{2})/', $html, $match); list ($d, $mon, $y, $h, $min) = array_slice ($match, 1); $date = date ("Y-m-d H:i:s.0", mktime ($h, $min, 0, $mon, $d, $y)); preg_match ('/(.*?)<\/a>/', $html, $match); $catId = $categories[trim ($match[1])]; $catId = (empty ($catId)) ? 10 : $catId; preg_match ('/
(.*?)
/s', $html, $match); $body = preg_replace ('/src="(page\d+_blog_entry\d+_\d?.\w{3})"/', "src=\"$baseUrl/data/files/$1\"", $match[1]); file_put_contents ("tmp.html", $body); exec ("python html2text.py tmp.html", $output); $body = mysql_real_escape_string (implode ($output, "\n")); $output = array (); $guid = md5 (microtime ()); $sql = "INSERT INTO post (title, guid, date, body) VALUES ($title, '$guid', '$date', '$body')"; mysql_query ($sql) or die (mysql_error ()); $postId = mysql_insert_id (); $sql = "INSERT INTO post_category (category_id, post_id) VALUES ($catId, $postId)"; query ($sql); $i++; } } echo "$i pages imported.\n"; ?>