|
@ -22,7 +22,7 @@ from towhee import DataLoader, pipe, ops |
|
|
p = ( |
|
|
p = ( |
|
|
pipe.input('url') |
|
|
pipe.input('url') |
|
|
.map('url', 'text', ops.text_loader()) |
|
|
.map('url', 'text', ops.text_loader()) |
|
|
.flat_map('text', 'sentence', text_split_op) |
|
|
|
|
|
|
|
|
.flat_map('text', 'sentence', ops.text_splitter()) |
|
|
.map('sentence', 'embedding', ops.sentence_embedding.transformers(model_name='all-MiniLM-L6-v2')) |
|
|
.map('sentence', 'embedding', ops.sentence_embedding.transformers(model_name='all-MiniLM-L6-v2')) |
|
|
.map('embedding', 'embedding', ops.towhee.np_normalize()) |
|
|
.map('embedding', 'embedding', ops.towhee.np_normalize()) |
|
|
.output('embedding') |
|
|
.output('embedding') |
|
@ -31,11 +31,11 @@ p = ( |
|
|
|
|
|
|
|
|
# table cols: id, image_path, label |
|
|
# table cols: id, image_path, label |
|
|
|
|
|
|
|
|
for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='*html')): |
|
|
|
|
|
|
|
|
for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='html')): |
|
|
print(p(data).to_list(kv_format=True)) |
|
|
print(p(data).to_list(kv_format=True)) |
|
|
|
|
|
|
|
|
# batch |
|
|
# batch |
|
|
for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='*html'), batch_size=10): |
|
|
|
|
|
|
|
|
for data in DataLoader(ops.data_source.readthedocs('https://towhee.readthedocs.io/en/latest/', include='html'), batch_size=10): |
|
|
p.batch(data) |
|
|
p.batch(data) |
|
|
``` |
|
|
``` |
|
|
|
|
|
|
|
|