diff --git a/README.md b/README.md index 96229e0..f16b83f 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ from towhee import DataLoader, pipe, ops p = ( pipe.input('url') .map('url', 'text', ops.text_loader()) - .flat_map('text', 'sentence', text_split_op) + .flat_map('text', 'sentence', ops.text_splitter()) .map('sentence', 'embedding', ops.sentence_embedding.transformers(model_name='all-MiniLM-L6-v2')) .map('embedding', 'embedding', ops.towhee.np_normalize()) .output('embedding') @@ -31,11 +31,11 @@ p = ( # table cols: id, image_path, label -for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='*html')): +for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='html')): print(p(data).to_list(kv_format=True)) # batch -for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='*html'), batch_size=10): +for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='html'), batch_size=10): p.batch(data) ``` diff --git a/docs_reader.py b/docs_reader.py index c9191c9..4c8aecf 100644 --- a/docs_reader.py +++ b/docs_reader.py @@ -26,7 +26,7 @@ class DocsReader(PyOperator): def __init__(self, page_prefix: str, index_page: str = None, - include: Union[List[str], str] = '.*', + include: Union[List[str], str] = '', exclude: Union[List[str], str] = None ):