logo
Browse Source

Update

Signed-off-by: junjie.jiang <junjie.jiang@zilliz.com>
main
junjie.jiang 12 months ago
parent
commit
fc9d0525bd
  1. 6
      README.md
  2. 2
      docs_reader.py

6
README.md

@ -22,7 +22,7 @@ from towhee import DataLoader, pipe, ops
p = (
pipe.input('url')
.map('url', 'text', ops.text_loader())
.flat_map('text', 'sentence', text_split_op)
.flat_map('text', 'sentence', ops.text_splitter())
.map('sentence', 'embedding', ops.sentence_embedding.transformers(model_name='all-MiniLM-L6-v2'))
.map('embedding', 'embedding', ops.towhee.np_normalize())
.output('embedding')
@ -31,11 +31,11 @@ p = (
# table cols: id, image_path, label
for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='*html')):
for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='html')):
print(p(data).to_list(kv_format=True))
# batch
for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='*html'), batch_size=10):
for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='html'), batch_size=10):
p.batch(data)
```

2
docs_reader.py

@ -26,7 +26,7 @@ class DocsReader(PyOperator):
def __init__(self,
page_prefix: str,
index_page: str = None,
include: Union[List[str], str] = '.*',
include: Union[List[str], str] = '',
exclude: Union[List[str], str] = None
):

Loading…
Cancel
Save