Merge remote-tracking branch 'origin/main' into feat/queue-based-graph-engine

This commit is contained in:
-LAN-
2025-09-03 11:56:05 +08:00
83 changed files with 2377 additions and 2351 deletions

View File

@ -228,7 +228,7 @@ class AnalyticdbVectorBySql:
)
documents = []
for record in cur:
id, vector, score, page_content, metadata = record
_, vector, score, page_content, metadata = record
if score >= score_threshold:
metadata["score"] = score
doc = Document(
@ -260,7 +260,7 @@ class AnalyticdbVectorBySql:
)
documents = []
for record in cur:
id, vector, page_content, metadata, score = record
_, vector, page_content, metadata, score = record
metadata["score"] = score
doc = Document(
page_content=page_content,

View File

@ -701,7 +701,7 @@ class ClickzettaVector(BaseVector):
len(data_rows),
vector_dimension,
)
except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
except (RuntimeError, ValueError, TypeError, ConnectionError):
logger.exception("Parameterized SQL execution failed for %d documents", len(data_rows))
logger.exception("SQL template: %s", insert_sql)
logger.exception("Sample data row: %s", data_rows[0] if data_rows else "None")
@ -787,7 +787,7 @@ class ClickzettaVector(BaseVector):
document_ids_filter = kwargs.get("document_ids_filter")
# Handle filter parameter from canvas (workflow)
filter_param = kwargs.get("filter", {})
_ = kwargs.get("filter", {})
# Build filter clause
filter_clauses = []
@ -879,7 +879,7 @@ class ClickzettaVector(BaseVector):
document_ids_filter = kwargs.get("document_ids_filter")
# Handle filter parameter from canvas (workflow)
filter_param = kwargs.get("filter", {})
_ = kwargs.get("filter", {})
# Build filter clause
filter_clauses = []
@ -938,7 +938,7 @@ class ClickzettaVector(BaseVector):
metadata = {}
else:
metadata = {}
except (json.JSONDecodeError, TypeError) as e:
except (json.JSONDecodeError, TypeError):
logger.exception("JSON parsing failed")
# Fallback: extract document_id with regex
@ -956,7 +956,7 @@ class ClickzettaVector(BaseVector):
metadata["score"] = 1.0 # Clickzetta doesn't provide relevance scores
doc = Document(page_content=row[1], metadata=metadata)
documents.append(doc)
except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
except (RuntimeError, ValueError, TypeError, ConnectionError):
logger.exception("Full-text search failed")
# Fallback to LIKE search if full-text search fails
return self._search_by_like(query, **kwargs)
@ -978,7 +978,7 @@ class ClickzettaVector(BaseVector):
document_ids_filter = kwargs.get("document_ids_filter")
# Handle filter parameter from canvas (workflow)
filter_param = kwargs.get("filter", {})
_ = kwargs.get("filter", {})
# Build filter clause
filter_clauses = []

View File

@ -212,10 +212,10 @@ class CouchbaseVector(BaseVector):
documents_to_insert = [
{"text": text, "embedding": vector, "metadata": metadata}
for id, text, vector, metadata in zip(uuids, texts, embeddings, metadatas)
for _, text, vector, metadata in zip(uuids, texts, embeddings, metadatas)
]
for doc, id in zip(documents_to_insert, uuids):
result = self._scope.collection(self._collection_name).upsert(id, doc)
_ = self._scope.collection(self._collection_name).upsert(id, doc)
doc_ids.extend(uuids)
@ -241,7 +241,7 @@ class CouchbaseVector(BaseVector):
"""
try:
self._cluster.query(query, named_parameters={"doc_ids": ids}).execute()
except Exception as e:
except Exception:
logger.exception("Failed to delete documents, ids: %s", ids)
def delete_by_document_id(self, document_id: str):

View File

@ -99,7 +99,7 @@ class MatrixoneVector(BaseVector):
return client
try:
client.create_full_text_index()
except Exception as e:
except Exception:
logger.exception("Failed to create full text index")
redis_client.set(collection_exist_cache_key, 1, ex=3600)
return client

View File

@ -197,7 +197,7 @@ class OpenSearchVector(BaseVector):
try:
response = self._client.search(index=self._collection_name.lower(), body=query)
except Exception as e:
except Exception:
logger.exception("Error executing vector search, query: %s", query)
raise

View File

@ -71,7 +71,7 @@ class TableStoreVector(BaseVector):
table_result = result.get_result_by_table(self._table_name)
for item in table_result:
if item.is_ok and item.row:
kv = {k: v for k, v, t in item.row.attribute_columns}
kv = {k: v for k, v, _ in item.row.attribute_columns}
docs.append(
Document(
page_content=kv[Field.CONTENT_KEY.value], metadata=json.loads(kv[Field.METADATA_KEY.value])

View File

@ -107,7 +107,7 @@ class Blob(BaseModel):
Blob instance
"""
if mime_type is None and guess_type:
_mimetype = mimetypes.guess_type(path)[0] if guess_type else None
_mimetype = mimetypes.guess_type(path)[0]
else:
_mimetype = mime_type
# We do not load the data immediately, instead we treat the blob as a

View File

@ -23,7 +23,7 @@ class UnstructuredWordExtractor(BaseExtractor):
unstructured_version = tuple(int(x) for x in __unstructured_version__.split("."))
# check the file extension
try:
import magic # noqa: F401
import magic # noqa: F401 # pyright: ignore[reportUnusedImport]
is_doc = detect_filetype(self._file_path) == FileType.DOC
except ImportError:

View File

@ -113,7 +113,7 @@ class QAIndexProcessor(BaseIndexProcessor):
# Skip the first row
df = pd.read_csv(file)
text_docs = []
for index, row in df.iterrows():
for _, row in df.iterrows():
data = Document(page_content=row.iloc[0], metadata={"answer": row.iloc[1]})
text_docs.append(data)
if len(text_docs) == 0:
@ -183,7 +183,7 @@ class QAIndexProcessor(BaseIndexProcessor):
qa_document.metadata["doc_hash"] = hash
qa_documents.append(qa_document)
format_documents.extend(qa_documents)
except Exception as e:
except Exception:
logger.exception("Failed to format qa document")
all_qa_documents.extend(format_documents)

View File

@ -9,7 +9,6 @@ from typing import Any, Optional, Union, cast
from flask import Flask, current_app
from sqlalchemy import Float, and_, or_, select, text
from sqlalchemy import cast as sqlalchemy_cast
from sqlalchemy.orm import Session
from core.app.app_config.entities import (
DatasetEntity,
@ -526,7 +525,7 @@ class DatasetRetrieval:
)
child_chunk = db.session.scalar(child_chunk_stmt)
if child_chunk:
segment = (
_ = (
db.session.query(DocumentSegment)
.where(DocumentSegment.id == child_chunk.segment_id)
.update(
@ -593,9 +592,8 @@ class DatasetRetrieval:
metadata_condition: Optional[MetadataCondition] = None,
):
with flask_app.app_context():
with Session(db.engine) as session:
dataset_stmt = select(Dataset).where(Dataset.id == dataset_id)
dataset = session.scalar(dataset_stmt)
dataset_stmt = select(Dataset).where(Dataset.id == dataset_id)
dataset = db.session.scalar(dataset_stmt)
if not dataset:
return []
@ -987,7 +985,7 @@ class DatasetRetrieval:
)
# handle invoke result
result_text, usage = self._handle_invoke_result(invoke_result=invoke_result)
result_text, _ = self._handle_invoke_result(invoke_result=invoke_result)
result_text_json = parse_and_check_json_markdown(result_text, [])
automatic_metadata_filters = []
@ -1002,7 +1000,7 @@ class DatasetRetrieval:
"condition": item.get("comparison_operator"),
}
)
except Exception as e:
except Exception:
return None
return automatic_metadata_filters

View File

@ -19,5 +19,5 @@ class StructuredChatOutputParser:
return ReactAction(response["action"], response.get("action_input", {}), text)
else:
return ReactFinish({"output": text}, text)
except Exception as e:
except Exception:
raise ValueError(f"Could not parse LLM output: {text}")

View File

@ -38,5 +38,5 @@ class FunctionCallMultiDatasetRouter:
# get retrieval model config
return result.message.tool_calls[0].function.name
return None
except Exception as e:
except Exception:
return None

View File

@ -77,7 +77,7 @@ class ReactMultiDatasetRouter:
user_id=user_id,
tenant_id=tenant_id,
)
except Exception as e:
except Exception:
return None
def _react_invoke(
@ -120,7 +120,7 @@ class ReactMultiDatasetRouter:
memory=None,
model_config=model_config,
)
result_text, usage = self._invoke_llm(
result_text, _ = self._invoke_llm(
completion_param=model_config.parameters,
model_instance=model_instance,
prompt_messages=prompt_messages,