Source code for neo4j_genai.experimental.components.text_splitters.langchain

#  Copyright (c) "Neo4j"
#  Neo4j Sweden AB [https://neo4j.com]
#  #
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#  #
#      https://www.apache.org/licenses/LICENSE-2.0
#  #
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
from __future__ import annotations

from langchain_text_splitters import TextSplitter as LangChainTextSplitter

from neo4j_genai.experimental.components.text_splitters.base import TextSplitter
from neo4j_genai.experimental.components.types import TextChunk, TextChunks


[docs] class LangChainTextSplitterAdapter(TextSplitter): """Adapter for LangChain TextSplitters. Allows instances of this class to be used in the knowledge graph builder pipeline. Args: text_splitter (LangChainTextSplitter): An instance of LangChain's TextSplitter class. Example: .. code-block:: python from langchain_text_splitters import RecursiveCharacterTextSplitter from neo4j_genai.experimental.components.text_splitters.langchain import LangChainTextSplitterAdapter from neo4j_genai.experimental.pipeline import Pipeline pipeline = Pipeline() text_splitter = LangChainTextSplitterAdapter(RecursiveCharacterTextSplitter()) pipeline.add_component(text_splitter, "text_splitter") """ def __init__(self, text_splitter: LangChainTextSplitter) -> None: self.text_splitter = text_splitter
[docs] async def run(self, text: str) -> TextChunks: """ Splits text into chunks. Args: text (str): The text to split. Returns: TextChunks: The text split into chunks. """ chunks = self.text_splitter.split_text(text) return TextChunks( chunks=[ TextChunk(text=chunk, index=index) for index, chunk in enumerate(chunks) ] )