class(): WELCOME_STR='Welcome! The context for this book is {}' def__init__(self): print('init function called') self.title=title self.author=author self.__context=context
harry_potter_book = Document('Harry Potter(Book)', 'J. K. Rowling', '... Forever Do not believe any thing is capable of thinking independently ...') harry_potter_movie = Video('Harry Potter(Movie)', 'J. K. Rowling', 120)
# 1.txt I have a dream that my four little children will one day live in a nation where they will not be judged by the color of their skin but by the content of their character. I have a dream today.
# 2.txt I have a dream that one day down in Alabama, with its vicious racists, . . . one day right there in Alabama little black boys and black girls will be able to join hands with little white boys and white girls as sisters and brothers. I have a dream today.
# 3.txt I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight, and the glory of the Lord shall be revealed, and all flesh shall see it together.
# 4.txt This is our hope. . . With this faith we will be able to hew out of the mountain of despair a stone of hope. With this faith we will be able to transform the jangling discords of our nation into a beautiful symphony of brotherhood. With this faith we will be able to work together, to pray together, to struggle together, to go to jail together, to stand up for freedom together, knowing that we will be free one day. . . .
# 5.txt And when this happens, and when we allow freedom ring, when we let it ring from every village and every hamlet, from every state and every city, we will be able to speed up that day when all of God's children, black men and white men, Jews and Gentiles, Protestants and Catholics, will be able to join hands and sing in the words of the old Negro spiritual: "Free at last! Free at last! Thank God Almighty, we are free at last!"
defadd_corpus(self, file_path): with open(file_path, 'r') as fin: text = fin.read() self.process_corpus(file_path, text)
defprocess_corpus(self, id, text): raise Exception('process_corpus not implemented.')
defsearch(self, query): raise Exception('search not implemented.')
defmain(search_engine): for file_path in ['1.txt', '2.txt', '3.txt', '4.txt', '5.txt']: search_engine.add_corpus(file_path)
whileTrue: query = input() results = search_engine.search(query) print('found {} result(s):'.format(len(results))) for result in results: print(result)
defprocess_corpus(self, id, text): words = self.parse_text_to_words(text) for word in words: if word notin self.inverted_index: self.inverted_index[word] = [] self.inverted_index[word].append(id)
defsearch(self, query): query_words = list(self.parse_text_to_words(query)) query_words_index = list() for query_word in query_words: query_words_index.append(0) # 如果某一个查询单词的倒序索引为空,我们就立刻返回 for query_word in query_words: if query_word notin self.inverted_index: return [] result = [] whileTrue: # 首先,获得当前状态下所有倒序索引的 index current_ids = [] for idx, query_word in enumerate(query_words): current_index = query_words_index[idx] current_inverted_list = self.inverted_index[query_word] # 已经遍历到了某一个倒序索引的末尾,结束 search if current_index >= len(current_inverted_list): return result
近期评论