Browse Source
Support more suffix of docs
Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
1 changed files with
11 additions and
2 deletions
-
osschat_insert.py
|
|
@ -67,7 +67,16 @@ def _get_embedding_op(config): |
|
|
|
return False, ops.sentence_embedding.openai(model_name=config.embedding_model, api_key=config.openai_api_key) |
|
|
|
raise RuntimeError('Unknown model: [%s], only support: %s' % (config.embedding_model, _hf_models + _sbert_models + _openai_models)) |
|
|
|
|
|
|
|
|
|
|
|
def data_loader(path): |
|
|
|
if path.endswith('pdf'): |
|
|
|
op = ops.data_loader.pdf_loader() |
|
|
|
elif path.endswith(('xls', 'xslx')): |
|
|
|
op = ops.data_loader.excel_loader() |
|
|
|
elif path.endswith('ppt'): |
|
|
|
op = ops.data_loader.powerpoint_loader() |
|
|
|
else: |
|
|
|
op = ops.text_loader() |
|
|
|
return op(path) |
|
|
|
|
|
|
|
@AutoPipes.register |
|
|
|
def osschat_insert_pipe(config): |
|
|
@ -91,7 +100,7 @@ def osschat_insert_pipe(config): |
|
|
|
|
|
|
|
p = ( |
|
|
|
pipe.input('doc', 'project_name') |
|
|
|
.map('doc', 'text', ops.text_loader()) |
|
|
|
.map('doc', 'text', data_loader) |
|
|
|
.flat_map('text', 'sentence', text_split_op) |
|
|
|
.map('sentence', 'embedding', sentence_embedding_op, config=sentence_embedding_config) |
|
|
|
) |
|
|
|