logo
Browse Source

Support more suffix of docs

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
Jael Gu 2 years ago
parent
commit
8bdb6fe08f
  1. 13
      osschat_insert.py

13
osschat_insert.py

@ -67,7 +67,16 @@ def _get_embedding_op(config):
return False, ops.sentence_embedding.openai(model_name=config.embedding_model, api_key=config.openai_api_key)
raise RuntimeError('Unknown model: [%s], only support: %s' % (config.embedding_model, _hf_models + _sbert_models + _openai_models))
def data_loader(path):
if path.endswith('pdf'):
op = ops.data_loader.pdf_loader()
elif path.endswith(('xls', 'xslx')):
op = ops.data_loader.excel_loader()
elif path.endswith('ppt'):
op = ops.data_loader.powerpoint_loader()
else:
op = ops.text_loader()
return op(path)
@AutoPipes.register
def osschat_insert_pipe(config):
@ -91,7 +100,7 @@ def osschat_insert_pipe(config):
p = (
pipe.input('doc', 'project_name')
.map('doc', 'text', ops.text_loader())
.map('doc', 'text', data_loader)
.flat_map('text', 'sentence', text_split_op)
.map('sentence', 'embedding', sentence_embedding_op, config=sentence_embedding_config)
)

Loading…
Cancel
Save