# Copyright (c) 2007 Michael Porter # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. """ Utility to copy blog posts from Wordpress to Blogger. Typical usage: 1. Use the Wordpress export feature to export your blog as an XML file 2. Run: python wptoblogger.py -u -b -a (add -d switch to delete *all* your blogger posts first) Note that (at the time of writing) Blogger has a limit on the number of posts per day so you may have problems if you have a lot to transfer. Requires the following Python libraries: BeautifulSoup - http://www.crummy.com/software/BeautifulSoup/ Google's GData Python Client - http://code.google.com/p/gdata-python-client/ """ import logging logger = logging.getLogger("wptoblogger") ## WORDPRESS UTILS ############################################# def get_posts(wp_xml_file): "Extract posts (with comments) from a WordPress XML export file" doc = wp_xml_file.read() # Ensure UTF8 chars get through correctly by ensuring we have a # compliant UTF8 input doc doc = doc.decode('utf-8', 'replace').encode('utf-8') import BeautifulSoup feed = BeautifulSoup.BeautifulSoup(doc) for entry in feed('item'): # Only include published posts if (entry.find('wp:post_type').string == 'post') and (entry.find('wp:status').string == 'publish'): comments = [] for comment in entry('wp:comment'): approved = comment.find('wp:comment_approved') # Only include approved comments if approved and (approved.string == '1'): comments.append(dict( content = comment.find('wp:comment_content').string, author = comment.find('wp:comment_author').string, author_url = comment.find('wp:comment_author_url').string, published = _wp_date_to_time(comment.find('wp:comment_date_gmt').string) )) yield dict( id = entry.find('wp:post_id').string, title = entry.find('title').string, content = entry.find('content:encoded').string, author = entry.find('dc:creator').string, published = _wp_date_to_time(entry.find('wp:post_date_gmt').string), categories = [c.string for c in entry('category')], comments = comments ) def _wp_date_to_time(wp_date): import time return time.strptime(wp_date, '%Y-%m-%d %H:%M:%S') ## BLOGGER UTILS ############################################### def get_service(user, pw): from gdata import service svc = service.GDataService(user, pw) svc.source="codesimple-wptoblogger-1.0" svc.service="blogger" svc.server="www.blogger.com" svc.ProgrammaticLogin() return svc def get_authsub_url(next_url): from gdata import service scope = 'http://www.blogger.com/feeds' secure = False session = True blogger_service = service.GDataService() return blogger_service.GenerateAuthSubURL(next_url, scope, secure, session) def get_service_from_token(authsub_token): from gdata import service svc = service.GDataService() svc.auth_token = authsub_token svc.UpgradeToSessionToken() return svc def comment_post_url_from_post(post): link = [l for l in post.link if l.rel=="replies" and l.type=="application/atom+xml"][0] return link.href def to_blog_time(time_tuple): import time return time.strftime('%Y-%m-%dT%H:%M:%SZ', time_tuple) def call_post(svc, entry, url): import gdata import time tries = 5 while True: try: tries -= 1 result = svc.Post(entry, url) break except gdata.service.RequestError, e: logger.error("Failed to post to %s, will retry %d times. Error was %s" % (url, tries, e)) if tries < 1: raise time.sleep(1) return result def blogger_post(svc, blog_id, author, title, content, published_date, categories): import gdata, functools from gdata import atom entry = gdata.GDataEntry() entry.author.append(atom.Author(atom.Name(text=author))) entry.title = atom.Title('xhtml', title) entry.content = atom.Content('html', '', content) entry.published = atom.Published(to_blog_time(published_date)) entry.category.extend([atom.Category(c, 'http://www.blogger.com/atom/ns#') for c in categories]) # Note that this appears to return a valid response even if your post has not been # uploaded due to Blogger limits on posts-per-day post = call_post(svc, entry, '/feeds/%s/posts/default' % blog_id) return dict( post = post, comment_func = functools.partial(comment, svc, comment_post_url_from_post(post)) ) def comment(svc, url, author, author_url, content, published_date, add_author = False): import gdata from gdata import atom entry = gdata.GDataEntry() entry.author.append(atom.Author(atom.Name(text=author))) # according to forum and practice this is ignored :( if add_author: author_html = author if author_url: author_html = '' % author_url + author_html + '' content = 'Comment from %s:\r\n\r\n' % author_html + content entry.content = atom.Content('html', '', content) entry.published = atom.Published(to_blog_time(published_date)) return call_post(svc, entry, url) def blogger_get_posts(svc, blog_id): feed = svc.GetFeed('/feeds/%s/posts/default' % blog_id) return feed.entry def delete_posts(svc, posts): for post in posts: svc.Delete(post.GetEditLink().href) def clear_blog(svc, blog_id): delete_posts(svc, blogger_get_posts(svc, blog_id)) ## MAIN ######################################################## def convert(wp_xml_file, blogger_service, blog_id, post_author): posts = get_posts(wp_xml_file) for post in posts: logger.info("Processing post %s" % post['title']) posted = blogger_post(blogger_service, blog_id, post_author, post['title'], post['content'], post['published'], post['categories']) add_comment = posted['comment_func'] for comment in post['comments']: author = comment['author'] add_comment(author, comment['author_url'], comment['content'], comment['published'], add_author = (author != post_author)) def run(wp_xml_file, blogger_user, blogger_password, authsub_token, blog_id, post_author, delete): if authsub_token: blogger_service = get_service_from_token(authsub_token) else: blogger_service = get_service(blogger_user, blogger_password) if delete: logger.info("Removing existing posts from blog %s" % blog_id) clear_blog(blogger_service, blog_id) convert(wp_xml_file, blogger_service, blog_id, post_author) def main(): from optparse import OptionParser parser = OptionParser(usage='usage: %prog [OPTION...] FILE', description='Extract posts from WordPress XML export file FILE and post to specified blogger blog.') parser.set_defaults(delete=False) parser.add_option("-u", "--user", dest="blogger_user", help="Blogger username", metavar="USERNAME") parser.add_option("-p", "--password", dest="blogger_password", help="Blogger password (omit option to get prompted)", metavar="PASSWORD") parser.add_option("-t", "--token", dest="authsub_token", help="Blogger AuthSub token (instead of username/password)", metavar="TOKEN") parser.add_option("-b", "--blog", dest="blog_id", help="Blogger blog ID", metavar="ID") parser.add_option("-a", "--author", dest="post_author", help="Author for Blogger posts", metavar="NAME") parser.add_option("-d", "--delete", action="store_true", dest="delete", help="Delete all entries from existing blog first") (options, args) = parser.parse_args() if not options.authsub_token and not options.blogger_password: import getpass options.blogger_password = getpass.getpass('Blogger password: ') if not (((options.blogger_user and options.blogger_password) or options.authsub_token) and options.blog_id and options.post_author): parser.error("Must specify blogger user/password or token and blog ID and post author.") if len(args) >= 1: file = open(args[0], 'r') else: import sys file = sys.stdin import logging logging.basicConfig(level=logging.INFO) run(wp_xml_file=file, **options.__dict__) if __name__ == "__main__": main()