import refrom itertools import imap as mapfrom collections import Counterdef parserwords(sentence): preword = '' result = [] for word in re.findall('\w+', sentence.lower()): if preword: result.append((preword, word)) preword = word return resultcontext = """Do you hear the people sing, singing a song of angry men. It is the music of a people, who will not be slaves again, when the beating of your heart echoes the beating of the drums. There is a life about to start when tomorrow comes."""words = []for sentence in map(parserwords, re.split(r'[,.]', context.lower())): words.extend(sentence)prefixcounter = Counter([word[0] for word in words])counter = Counter(words)meter = {}for pre, post in counter.iterkeys(): meter[(pre, post)] = 1. * counter[(pre, post)] / prefixcounter[pre]result = sorted(meter.iteritems(), cmp = lambda a, b: cmp(b[1], a[1]) or cmp(a[0], b[0]) )print result[:5]