# ShowDict.py """ Compiles a dictionary of word frequencies for the Shakespeare sonnet collection. """ def GetSonnets(): """ Returns a list of strings L with the property that L[k] is the kth line in the file Sonnets.txt. """ L = [] with open('TheSonnets.txt',"r") as F: for s in F: # Add the next line in the file to L without the trailing "\n" (the # new line character), and without the trailing "\r" (the carriage # return character) L.append(s.rstrip("\n").rstrip("\r")) return L def dePunc(s): """Returns a string that is the same as s except that all occurrences of the period, comma, semicolon, colon, question mark, and explanation point are replaced by the null string. Leading and trailing apostrophes are also removed. Example: dePunc('a.b,c;d:e?f!') returns 'abcdef' PreC: s is a string. """ if s == '': return s # s is not the empty string, remove punctuation s = s.replace('.','') s = s.replace(',','') s = s.replace(';','') s = s.replace(':','') s = s.replace('?','') s = s.replace('!','') if s[0]=='\'': s = s[1:] if s[len(s)-1]=='\'': s = s[:len(s)-1] return s def WordsInLine(s): """ Returns a list of strings, each of which is a word in t, where t is s with all punctuation removed and all its letters lower-cased. A word is delimited by blanks. PreC: s is a string """ s = s.lower() s = dePunc(s) W = s.split() return W def UpdateFreqD(D,s): """ If s is a key in D, then its value is incremented. Otherwise, s is added into D as a key and its value is set to 1. PreC: D is a dictionary with keys that are strings and values that are ints. s is a string. """ if s in D: D[s] +=1 else: D[s] = 1 def MakeFreqD(L): """ Returns a dictionary whose keys are words that occur in L and whose values are their frequency, PreC: L is a list of strings """ D = dict() for s in L: W = WordsInLine(s) for w in W: UpdateFreqD(D,w) return D def printFreq(D,ListOfWords): """ Prints each string in ListOfWords and its frquency in D. PreC: D is a dictionary with keys that are strings and values that are ints. """ for w in ListOfWords: if w in D: f = D[w] else: f = 0 print '%10s %3d' %(w,f) if __name__ == '__main__': # Read in the sonnets, line-by-line.. L = GetSonnets() # Make the frequency dictionary D = MakeFreqD(L) # Lets display the frequencies of these words... MyWordList = (['love', 'sun','moon','sad','happy','thou','me','flowers', 'water','dude','rain','clouds','wonder','forever'] ) printFreq(D,MyWordList)