logo
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Readme
Files and versions

15 lines
533 B

from towhee.operator import PyOperator
from typing import List
from langchain.text_splitter import RecursiveCharacterTextSplitter
class TextSpliter(PyOperator):
'''Split data into a list.'''
def __init__(self, chunk_size: int = 300):
self.splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size)
def __call__(self, data: str) -> List[str]:
texts = self.splitter.create_documents([data])
docs = self.splitter.split_documents(texts)
return [str(doc.page_content) for doc in docs]