towhee
/
text-splitter
copied
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Readme
Files and versions
15 lines
533 B
15 lines
533 B
1 year ago
|
from towhee.operator import PyOperator
|
||
|
from typing import List
|
||
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||
|
|
||
|
|
||
|
class TextSpliter(PyOperator):
|
||
|
'''Split data into a list.'''
|
||
|
def __init__(self, chunk_size: int = 300):
|
||
|
self.splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size)
|
||
|
|
||
|
def __call__(self, data: str) -> List[str]:
|
||
|
texts = self.splitter.create_documents([data])
|
||
|
docs = self.splitter.split_documents(texts)
|
||
|
return [str(doc.page_content) for doc in docs]
|