Merge branch 'main' into feat/end-user-oauth

# Conflicts:
#	web/app/components/app/configuration/config/agent/agent-tools/index.tsx
This commit is contained in:
zhsama
2025-12-09 17:41:01 +08:00
350 changed files with 9871 additions and 9515 deletions

View File

@ -19,7 +19,9 @@ from sqlalchemy.orm import Mapped, Session, mapped_column
from configs import dify_config
from core.rag.index_processor.constant.built_in_field import BuiltInField, MetadataDataSource
from core.rag.index_processor.constant.query_type import QueryType
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.tools.signature import sign_upload_file
from extensions.ext_storage import storage
from libs.uuid_utils import uuidv7
from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule
@ -76,6 +78,7 @@ class Dataset(Base):
pipeline_id = mapped_column(StringUUID, nullable=True)
chunk_structure = mapped_column(sa.String(255), nullable=True)
enable_api = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true"))
is_multimodal = mapped_column(sa.Boolean, nullable=False, server_default=db.text("false"))
@property
def total_documents(self):
@ -728,9 +731,7 @@ class DocumentSegment(Base):
created_by = mapped_column(StringUUID, nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
updated_by = mapped_column(StringUUID, nullable=True)
updated_at: Mapped[datetime] = mapped_column(
DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp()
)
updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
indexing_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
completed_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
error = mapped_column(LongText, nullable=True)
@ -866,6 +867,47 @@ class DocumentSegment(Base):
return text
@property
def attachments(self) -> list[dict[str, Any]]:
# Use JOIN to fetch attachments in a single query instead of two separate queries
attachments_with_bindings = db.session.execute(
select(SegmentAttachmentBinding, UploadFile)
.join(UploadFile, UploadFile.id == SegmentAttachmentBinding.attachment_id)
.where(
SegmentAttachmentBinding.tenant_id == self.tenant_id,
SegmentAttachmentBinding.dataset_id == self.dataset_id,
SegmentAttachmentBinding.document_id == self.document_id,
SegmentAttachmentBinding.segment_id == self.id,
)
).all()
if not attachments_with_bindings:
return []
attachment_list = []
for _, attachment in attachments_with_bindings:
upload_file_id = attachment.id
nonce = os.urandom(16).hex()
timestamp = str(int(time.time()))
data_to_sign = f"image-preview|{upload_file_id}|{timestamp}|{nonce}"
secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b""
sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest()
encoded_sign = base64.urlsafe_b64encode(sign).decode()
params = f"timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}"
reference_url = dify_config.CONSOLE_API_URL or ""
base_url = f"{reference_url}/files/{upload_file_id}/image-preview"
source_url = f"{base_url}?{params}"
attachment_list.append(
{
"id": attachment.id,
"name": attachment.name,
"size": attachment.size,
"extension": attachment.extension,
"mime_type": attachment.mime_type,
"source_url": source_url,
}
)
return attachment_list
class ChildChunk(Base):
__tablename__ = "child_chunks"
@ -963,6 +1005,38 @@ class DatasetQuery(TypeBase):
DateTime, nullable=False, server_default=sa.func.current_timestamp(), init=False
)
@property
def queries(self) -> list[dict[str, Any]]:
try:
queries = json.loads(self.content)
if isinstance(queries, list):
for query in queries:
if query["content_type"] == QueryType.IMAGE_QUERY:
file_info = db.session.query(UploadFile).filter_by(id=query["content"]).first()
if file_info:
query["file_info"] = {
"id": file_info.id,
"name": file_info.name,
"size": file_info.size,
"extension": file_info.extension,
"mime_type": file_info.mime_type,
"source_url": sign_upload_file(file_info.id, file_info.extension),
}
else:
query["file_info"] = None
return queries
else:
return [queries]
except JSONDecodeError:
return [
{
"content_type": QueryType.TEXT_QUERY,
"content": self.content,
"file_info": None,
}
]
class DatasetKeywordTable(TypeBase):
__tablename__ = "dataset_keyword_tables"
@ -1470,3 +1544,25 @@ class PipelineRecommendedPlugin(TypeBase):
onupdate=func.current_timestamp(),
init=False,
)
class SegmentAttachmentBinding(Base):
__tablename__ = "segment_attachment_bindings"
__table_args__ = (
sa.PrimaryKeyConstraint("id", name="segment_attachment_binding_pkey"),
sa.Index(
"segment_attachment_binding_tenant_dataset_document_segment_idx",
"tenant_id",
"dataset_id",
"document_id",
"segment_id",
),
sa.Index("segment_attachment_binding_attachment_idx", "attachment_id"),
)
id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuidv7()))
tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
dataset_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
document_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
segment_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
attachment_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
created_at: Mapped[datetime] = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())