from towhee.operator import PyOperator from typing import List from langchain.text_splitter import RecursiveCharacterTextSplitter class TextSpliter(PyOperator): '''Split data into a list.''' def __init__(self, chunk_size: int = 300): self.splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size) def __call__(self, data: str) -> List[str]: texts = self.splitter.create_documents([data]) docs = self.splitter.split_documents(texts) return [str(doc.page_content) for doc in docs]