""" A script to get a feed full of links (like Delicious bookmarks) and post batches of them to a single entry on LJ. It accumulates links until there are MAX_PENDING of them or until the last LJ entry it made was MAX_TIME ago. The information which it needs between runs is written to PICKLE_FILE. """ # $Id: lj-minifeed.py,v 1.5 2009/10/07 22:41:58 paul Exp $ import xml.dom.minidom import md5 import sys import pickle import feedparser import codecs import xmlrpclib import pickle import datetime import time PICKLE_FILE = "minifeed.data" MAX_PENDING = 10 # post if we have this many items pending MAX_TIME = 60 * 60 * 24 * 4 # or if we have something pending and it's been this long POST_HEADING = "" # HTML at the begining of the body of the post. You'll want to change this # Cheapo object persistence via pickle. This is the the object we # save and restore. The defaults are only used if we can't find the # pickle file. class State: def __init__(self): self.seen_guids = [] self.feed_url = "http://feeds.delicious.com/v2/rss/pw201?count=15" self.etag = None self.modified = None self.pending = [] self.last_time = 0 self.tag_dict = {} def update_lj_args(lj, args, username, password): challenge = lj.LJ.XMLRPC.getchallenge() std = { 'version': 1, # so LJ assumes we're using UTF-8. 'auth_method': 'challenge', 'auth_challenge': challenge['challenge'], 'auth_response': md5.new(challenge['challenge'] + md5.new(password).hexdigest()).hexdigest(), 'username': username, } args.update(std) def post_to_lj(body, title, tags, username, password): # Faff to comply with LJ's bots policy class CustomTransport(xmlrpclib.Transport): user_agent = "http://www.noctua.org.uk/paul/software/lj-minifeed.html; pw201 AT livejournal.com, running on behalf of lj user " + username lj = xmlrpclib.ServerProxy("http://www.livejournal.com/interface/xmlrpc", CustomTransport()) args = {} update_lj_args(lj, args, username, password) # I don't want to create new tags on LJ for every tag I use on delicious # but I do want to use the LJ tags if they already existed. So we check the # delicious tags against the LJ ones. I use a "-" for a space on delicious, # spaces are allowed in tags on LJ. lj_tags = [t['name'] for t in lj.LJ.XMLRPC.getusertags(args)['tags']] munged_tags = [t.replace('-', ' ') for t in tags] post_tags = "link blog," + ",".join([t for t in munged_tags if t in lj_tags]) now = datetime.datetime.now() args = { "event" : body, "subject" : title, "year" : now.year, "mon" : now.month, "day" : now.day, "hour": now.hour, "min" : now.minute, "props": {"taglist": post_tags} } update_lj_args(lj, args, username, password) return lj.LJ.XMLRPC.postevent(args) def get_html(item): """ Generate HTML summary for an entry in the feed """ html = u'
%s' % (item.link, item.title) if item.has_key("description"): html += '
%s' % item.description if item.has_key("categories"): html += '
(tags: %s)' % " ".join(['%s' % (x[0], x[1], x[1]) for x in item.categories]) return html if __name__ == "__main__": try: p = open(PICKLE_FILE) state = pickle.load(p) p.close() except: state = State() # I keep my username and password in an xml file which configures my backup # tool. If you don't, you'll need to alter this. config = xml.dom.minidom.parse("ljdump.config.pw201") username = config.documentElement.getElementsByTagName("username")[0].childNodes[0].data password = config.documentElement.getElementsByTagName("password")[0].childNodes[0].data f = feedparser.parse(state.feed_url, etag=state.etag, modified=state.modified) # Be nice to the other side by remembering when we last polled them if f.has_key('etag'): state.etag = f.etag if f.has_key('modified'): state.modified = f.modified if f.status == 301: # Moved state.feed_url = f.href new_entries = [x for x in f.entries if x.id not in state.seen_guids] state.pending.extend([get_html(x) for x in new_entries]) for e in new_entries: state.seen_guids.insert(0, e.id) if e.has_key("categories"): for c in e.categories: state.tag_dict[c[1]] = state.tag_dict.setdefault(c[1], 0) + 1 state.seen_guids = state.seen_guids[:100] # Don't let this grow without limit # Is it time to post an entry to LJ? if state.pending and (len(state.pending) > MAX_PENDING or time.time() - state.last_time > MAX_TIME): body = POST_HEADING + u"
" + "".join(state.pending) + "
" # Put the most used tags in the subject of the posting popular_tags = state.tag_dict.keys() popular_tags.sort(lambda x, y: -cmp(state.tag_dict[x], state.tag_dict[y])) print post_to_lj(body, "Link blog: " + ", ".join(popular_tags[:4]), state.tag_dict.keys(), username, password) state.last_time = time.time() state.pending = [] state.tag_dict = {} # Remember everything p = open(PICKLE_FILE, "w") pickle.dump(state, p) p.close()