当前位置: 首页 > news >正文

custom_document

from llama_index.core import Document
from typing import Dict, Any, Optional, List
from pydantic import Field
import jsonclass CustomDocument(Document):"""自定义文档类,扩展了额外的元数据和功能"""# 添加新的字段document_type: str = Field(default="general", description="文档类型")priority: int = Field(default=1, description="处理优先级")custom_tags: List[str] = Field(default_factory=list, description="自定义标签")def __init__(self, **data: Any):# 调用父类初始化,处理向后兼容性super().__init__(**data)def add_custom_tag(self, tag: str) -> None:"""添加自定义标签"""self.custom_tags.append(tag)def get_document_info(self) -> Dict[str, Any]:"""获取文档完整信息"""return {"id": self.id_,"type": self.document_type,"priority": self.priority,"tags": self.custom_tags,"metadata": self.metadata,"text_length": len(self.text) if self.text else 0}# 创建自定义文档实例
custom_doc = CustomDocument(text="这是文档内容...",metadata={"source": "内部文档", "author": "张三"},document_type="report",priority=5,custom_tags=["重要", "待审核"]
)# 使用自定义方法
custom_doc.add_custom_tag("技术文档")
info = custom_doc.get_document_info()
print(info)