Skip to content
Snippets Groups Projects
Select Git revision
  • a38f5e974211443aaf1b8c577caad388747d61bb
  • master default protected
  • v0.11
  • v0.10
  • v0.9
  • v0.8
  • v0.7
7 results

basic_search_index.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    basic_search_index.py 1.65 KiB
    import os
    import shutil
    
    from whoosh.fields import TEXT, Schema, ID, NUMERIC
    from whoosh.index import create_in, open_dir, EmptyIndexError
    from whoosh.qparser import QueryParser, OrGroup
    
    
    class BasicSearchIndex:
        """Expose relevant functions of Whoosh using a simple interface"""
    
        def __init__(self, index_dir="index"):
            self.schema = Schema(file_name=ID(stored=True), page=NUMERIC(stored=True), content=TEXT(stored=True),
                                 title=TEXT(stored=True))
            try:
                self.ix = open_dir(index_dir)
            except EmptyIndexError:
                self.create(index_dir)
    
        def create(self, index_dir):
            self.ix = create_in(index_dir, self.schema)
    
        def add(self, file_name, page, content, title):
            writer = self.ix.writer()
            writer.add_document(file_name=file_name, page=page, content=content, title=title)
            writer.commit()
    
        def result_list(self, query, context):
            query_parser = QueryParser("content", self.ix.schema, group=OrGroup.factory(0.9))
            return self.ix.searcher().search(query_parser.parse(query))
    
        def search(self, query, context):
            return self.result_list(query, context)[0]
    
    
    if __name__ == "__main__":
        os.makedirs("index_test")
        index = BasicSearchIndex("index_test")
        index.add(file_name=u"/world", page=1, content=u"this is a test about the world")
        index.add(file_name=u"/fire", page=2, content=u"i could not imagine the heat or the regression")
        index.add(file_name=u"/dream", page=3, content=u"dreaming is special not only to humans but all animals")
        print(index.search("logistic regression"))
        shutil.rmtree('index_test', ignore_errors=True)