#coding=utf-8
# 一个纯英文文本中,统计每个单词出现的次数,找出出现次数最多的单词
import re
def ConvertAllNoLetterToSpace(inString):
templist = re.findall(r'[^a-zA-Z]+', inString)# 需再改进
for i in templist:
inString = inString.replace(i, ' ')
return inString
def ConstructWordsDict(inFile):
wordsDict = {}
for line in inFile:
line = ConvertAllNoLetterToSpace(line)
listWord = line.split()
for word in listWord:
if word not in wordsDict:
wordsDict[word] = 1
else:
wordsDict[word] = wordsDict[word] + 1
return wordsDict
def FindMostWordInDict(inDict):
retWord = ''
retNum = 0
for word in inDict:
if inDict[word] > retNum:
retWord = word
retNum = inDict[word]
return retWord, retNum
if __name__ == '__main__':
inFilePath = r'D:\eclipse\workspace\PyStudy\words.txt'
inFile = open(inFilePath)
wordsDict = ConstructWordsDict(inFile)
retword, retNum = FindMostWordInDict(wordsDict)
print retword, retNum
print wordsDict