Add rerank config

Signed-off-by: shiyu22 <shiyu.chen@zilliz.com>
2 years ago · 47daa58a2c
2 changed files with 34 additions and 16 deletions
--- a/README.md
+++ b/README.md
@ -26,8 +26,8 @@ config.collection_name = 'chatbot'
 config.top_k = 5
 # If using zilliz cloud
 config.user = [zilliz-cloud-username]
 config.password = [zilliz-cloud-password]
 # config.user = [zilliz-cloud-username]
 # config.password = [zilliz-cloud-password]
 # OpenAI api key
 config.openai_api_key = [your-openai-api-key]
@ -36,8 +36,8 @@ config.embedding_model = 'all-MiniLM-L6-v2'
 # Embedding model device
 config.embedding_device = -1
 # The threshold to filter milvus search result
 config.threshold = 0.5
 # Rerank the docs searched from knowledge base
 config.rerank = True
 # The llm model source, openai or dolly
 config.llm_src = 'openai'
@ -99,11 +99,19 @@ The user name for [Cloud user](https://zilliz.com/cloud), defaults to `None`.
 The user password for [Cloud user](https://zilliz.com/cloud), defaults to `None`.
 ### Configuration for Similarity Evaluation
 ### Configuration for Rerank
 ***threshold (Union[float, int]):***
 ***rerank***: bool
 The threshold to filter the milvus search result.
 Whether to rerank the docs searched from knowledge base, defaults to False. If set it to True it will using the [rerank](https://towhee.io/towhee/rerank) operator.
 ***rerank_model***: str
 The name of rerank model, you can set it according to the [rerank](https://towhee.io/towhee/rerank) operator.
 ***threshold:*** Union[float, int]
 The threshold for rerank, defaults to 0.6. If the `rerank` is `False`, it will filter the milvus search result, otherwise it will be filtered with the [rerank](https://towhee.io/towhee/rerank) operator.
 ### Configuration for LLM
--- a/eqa_search.py
+++ b/eqa_search.py
@ -33,8 +33,6 @@ class EnhancedQASearchConfig(BaseModel):
    top_k: Optional[int] = 5
    user: Optional[str] = None
    password: Optional[str] = None
    # config for similarity evaluation
    threshold: Optional[Union[float, int]] = 0.6
    # config for llm
    llm_src: Optional[str] = 'openai'
    openai_model: Optional[str] = 'gpt-3.5-turbo'
@ -44,7 +42,11 @@ class EnhancedQASearchConfig(BaseModel):
    ernie_secret_key: Optional[str] = None
    customize_llm: Optional[Any] = None
    customize_prompt: Optional[Any] = None    
    customize_prompt: Optional[Any] = None
    # config for rerank
    rerank: Optional[bool] = False
    rerank_model: Optional[str] = 'cross-encoder/ms-marco-MiniLM-L-6-v2'
    threshold: Optional[Union[float, int]] = 0.6
 _hf_models = ops.sentence_embedding.transformers().get_op().supported_model_names()
@ -131,15 +133,23 @@ def enhanced_qa_search_pipe(config):
            .map('embedding', 'result', search_milvus_op)
    )
    # if config.similarity_evaluation:
    if config.threshold:
    if config.rerank:
        p = (
            p.map('result', 'docs', lambda x: [i[2] for i in x])
             .map(('question', 'docs'), ('docs', 'score'), ops.rerank(config.rerank_model, config.threshold))
             .map('docs', 'docs', lambda x: '\n'.join([i for i in x]))
        )
    elif config.threshold:
        sim_eval_op = _get_similarity_evaluation_op(config)
        p = p.map('result', 'result', sim_eval_op)
        p = (p.map('result', 'result', sim_eval_op)
             .map('result', 'docs', lambda x: '\n'.join([i[2] for i in x]))
             )
    else:
        p = p.map('result', 'docs', lambda x: '\n'.join([i[2] for i in x]))
    p = (
        p.map('result', 'docs', lambda x: '\n'.join([i[2] for i in x]))
            .map(('question', 'docs', 'history'), 'prompt', _get_prompt(config))
            .map('prompt', 'answer', llm_op)
        p.map(('question', 'docs', 'history'), 'prompt', _get_prompt(config))
         .map('prompt', 'answer', llm_op)
    )
    return p.output('answer')