Source code for neo4j_graphrag.experimental.components.types

#  Copyright (c) "Neo4j"
#  Neo4j Sweden AB [https://neo4j.com]
#  #
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#  #
#      https://www.apache.org/licenses/LICENSE-2.0
#  #
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
from __future__ import annotations

import uuid
from enum import Enum
from typing import Any, Dict, Optional

from pydantic import BaseModel, Field, field_validator

from neo4j_graphrag.experimental.pipeline.component import DataModel


[docs] class DocumentInfo(DataModel): """A document loaded by a DataLoader. Attributes: path (str): Document path. metadata (Optional[dict[str, Any]]): Metadata associated with this document. uid (str): Unique identifier for this document. """ path: str metadata: Optional[Dict[str, str]] = None uid: str = Field(default_factory=lambda: str(uuid.uuid4())) @property def document_id(self) -> str: return self.uid
class PdfDocument(DataModel): text: str document_info: DocumentInfo
[docs] class TextChunk(BaseModel): """A chunk of text split from a document by a text splitter. Attributes: text (str): The raw chunk text. index (int): The position of this chunk in the original document. metadata (Optional[dict[str, Any]]): Metadata associated with this chunk. uid (str): Unique identifier for this chunk. """ text: str index: int metadata: Optional[dict[str, Any]] = None uid: str = Field(default_factory=lambda: str(uuid.uuid4())) @property def chunk_id(self) -> str: return self.uid
[docs] class TextChunks(DataModel): """A collection of text chunks returned from a text splitter. Attributes: chunks (list[TextChunk]): A list of text chunks. """ chunks: list[TextChunk]
[docs] class Neo4jNode(BaseModel): """Represents a Neo4j node. Attributes: id (str): The element ID of the node. label (str): The label of the node. properties (dict[str, Any]): A dictionary of properties attached to the node. embedding_properties (Optional[dict[str, list[float]]]): A list of embedding properties attached to the node. """ id: str label: str properties: dict[str, Any] = {} embedding_properties: Optional[dict[str, list[float]]] = None @field_validator("properties", "embedding_properties") @classmethod def check_for_id_properties( cls, v: Optional[dict[str, Any]] ) -> Optional[dict[str, Any]]: if v and "id" in v.keys(): raise TypeError("'id' as a property name is not allowed") return v
[docs] class Neo4jRelationship(BaseModel): """Represents a Neo4j relationship. Attributes: start_node_id (str): The ID of the start node. end_node_id (str): The ID of the end node. type (str): The relationship type. properties (dict[str, Any]): A dictionary of properties attached to the relationship. embedding_properties (Optional[dict[str, list[float]]]): A list of embedding properties attached to the relationship. """ start_node_id: str end_node_id: str type: str properties: dict[str, Any] = {} embedding_properties: Optional[dict[str, list[float]]] = None
[docs] class Neo4jGraph(DataModel): """Represents a Neo4j graph. Attributes: nodes (list[Neo4jNode]): A list of nodes in the graph. relationships (list[Neo4jRelationship]): A list of relationships in the graph. """ nodes: list[Neo4jNode] = [] relationships: list[Neo4jRelationship] = []
class ResolutionStats(DataModel): number_of_nodes_to_resolve: int number_of_created_nodes: Optional[int] = None DEFAULT_DOCUMENT_NODE_LABEL = "Document" DEFAULT_CHUNK_NODE_LABEL = "Chunk" DEFAULT_CHUNK_TO_DOCUMENT_RELATIONSHIP_TYPE = "FROM_DOCUMENT" DEFAULT_NEXT_CHUNK_RELATIONSHIP_TYPE = "NEXT_CHUNK" DEFAULT_NODE_TO_CHUNK_RELATIONSHIP_TYPE = "FROM_CHUNK" DEFAULT_CHUNK_ID_PROPERTY = "id" DEFAULT_CHUNK_INDEX_PROPERTY = "index" DEFAULT_CHUNK_TEXT_PROPERTY = "text" DEFAULT_CHUNK_EMBEDDING_PROPERTY = "embedding"
[docs] class LexicalGraphConfig(BaseModel): """Configure all labels and property names in the lexical graph.""" id_prefix: str = Field(deprecated=True, default="") document_node_label: str = DEFAULT_DOCUMENT_NODE_LABEL chunk_node_label: str = DEFAULT_CHUNK_NODE_LABEL chunk_to_document_relationship_type: str = ( DEFAULT_CHUNK_TO_DOCUMENT_RELATIONSHIP_TYPE ) next_chunk_relationship_type: str = DEFAULT_NEXT_CHUNK_RELATIONSHIP_TYPE node_to_chunk_relationship_type: str = DEFAULT_NODE_TO_CHUNK_RELATIONSHIP_TYPE chunk_id_property: str = DEFAULT_CHUNK_ID_PROPERTY chunk_index_property: str = DEFAULT_CHUNK_INDEX_PROPERTY chunk_text_property: str = DEFAULT_CHUNK_TEXT_PROPERTY chunk_embedding_property: str = DEFAULT_CHUNK_EMBEDDING_PROPERTY @property def lexical_graph_node_labels(self) -> tuple[str, ...]: return self.document_node_label, self.chunk_node_label
class GraphResult(DataModel): graph: Neo4jGraph config: LexicalGraphConfig class SchemaEnforcementMode(str, Enum): NONE = "NONE" STRICT = "STRICT"