mirror of
https://github.com/langgenius/dify.git
synced 2026-05-05 01:48:04 +08:00
Merge remote-tracking branch 'origin/main' into feat/queue-based-graph-engine
This commit is contained in:
@ -228,7 +228,7 @@ class AnalyticdbVectorBySql:
|
||||
)
|
||||
documents = []
|
||||
for record in cur:
|
||||
id, vector, score, page_content, metadata = record
|
||||
_, vector, score, page_content, metadata = record
|
||||
if score >= score_threshold:
|
||||
metadata["score"] = score
|
||||
doc = Document(
|
||||
@ -260,7 +260,7 @@ class AnalyticdbVectorBySql:
|
||||
)
|
||||
documents = []
|
||||
for record in cur:
|
||||
id, vector, page_content, metadata, score = record
|
||||
_, vector, page_content, metadata, score = record
|
||||
metadata["score"] = score
|
||||
doc = Document(
|
||||
page_content=page_content,
|
||||
|
||||
@ -701,7 +701,7 @@ class ClickzettaVector(BaseVector):
|
||||
len(data_rows),
|
||||
vector_dimension,
|
||||
)
|
||||
except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
|
||||
except (RuntimeError, ValueError, TypeError, ConnectionError):
|
||||
logger.exception("Parameterized SQL execution failed for %d documents", len(data_rows))
|
||||
logger.exception("SQL template: %s", insert_sql)
|
||||
logger.exception("Sample data row: %s", data_rows[0] if data_rows else "None")
|
||||
@ -787,7 +787,7 @@ class ClickzettaVector(BaseVector):
|
||||
document_ids_filter = kwargs.get("document_ids_filter")
|
||||
|
||||
# Handle filter parameter from canvas (workflow)
|
||||
filter_param = kwargs.get("filter", {})
|
||||
_ = kwargs.get("filter", {})
|
||||
|
||||
# Build filter clause
|
||||
filter_clauses = []
|
||||
@ -879,7 +879,7 @@ class ClickzettaVector(BaseVector):
|
||||
document_ids_filter = kwargs.get("document_ids_filter")
|
||||
|
||||
# Handle filter parameter from canvas (workflow)
|
||||
filter_param = kwargs.get("filter", {})
|
||||
_ = kwargs.get("filter", {})
|
||||
|
||||
# Build filter clause
|
||||
filter_clauses = []
|
||||
@ -938,7 +938,7 @@ class ClickzettaVector(BaseVector):
|
||||
metadata = {}
|
||||
else:
|
||||
metadata = {}
|
||||
except (json.JSONDecodeError, TypeError) as e:
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.exception("JSON parsing failed")
|
||||
# Fallback: extract document_id with regex
|
||||
|
||||
@ -956,7 +956,7 @@ class ClickzettaVector(BaseVector):
|
||||
metadata["score"] = 1.0 # Clickzetta doesn't provide relevance scores
|
||||
doc = Document(page_content=row[1], metadata=metadata)
|
||||
documents.append(doc)
|
||||
except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
|
||||
except (RuntimeError, ValueError, TypeError, ConnectionError):
|
||||
logger.exception("Full-text search failed")
|
||||
# Fallback to LIKE search if full-text search fails
|
||||
return self._search_by_like(query, **kwargs)
|
||||
@ -978,7 +978,7 @@ class ClickzettaVector(BaseVector):
|
||||
document_ids_filter = kwargs.get("document_ids_filter")
|
||||
|
||||
# Handle filter parameter from canvas (workflow)
|
||||
filter_param = kwargs.get("filter", {})
|
||||
_ = kwargs.get("filter", {})
|
||||
|
||||
# Build filter clause
|
||||
filter_clauses = []
|
||||
|
||||
@ -212,10 +212,10 @@ class CouchbaseVector(BaseVector):
|
||||
|
||||
documents_to_insert = [
|
||||
{"text": text, "embedding": vector, "metadata": metadata}
|
||||
for id, text, vector, metadata in zip(uuids, texts, embeddings, metadatas)
|
||||
for _, text, vector, metadata in zip(uuids, texts, embeddings, metadatas)
|
||||
]
|
||||
for doc, id in zip(documents_to_insert, uuids):
|
||||
result = self._scope.collection(self._collection_name).upsert(id, doc)
|
||||
_ = self._scope.collection(self._collection_name).upsert(id, doc)
|
||||
|
||||
doc_ids.extend(uuids)
|
||||
|
||||
@ -241,7 +241,7 @@ class CouchbaseVector(BaseVector):
|
||||
"""
|
||||
try:
|
||||
self._cluster.query(query, named_parameters={"doc_ids": ids}).execute()
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to delete documents, ids: %s", ids)
|
||||
|
||||
def delete_by_document_id(self, document_id: str):
|
||||
|
||||
@ -99,7 +99,7 @@ class MatrixoneVector(BaseVector):
|
||||
return client
|
||||
try:
|
||||
client.create_full_text_index()
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to create full text index")
|
||||
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||
return client
|
||||
|
||||
@ -197,7 +197,7 @@ class OpenSearchVector(BaseVector):
|
||||
|
||||
try:
|
||||
response = self._client.search(index=self._collection_name.lower(), body=query)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Error executing vector search, query: %s", query)
|
||||
raise
|
||||
|
||||
|
||||
@ -71,7 +71,7 @@ class TableStoreVector(BaseVector):
|
||||
table_result = result.get_result_by_table(self._table_name)
|
||||
for item in table_result:
|
||||
if item.is_ok and item.row:
|
||||
kv = {k: v for k, v, t in item.row.attribute_columns}
|
||||
kv = {k: v for k, v, _ in item.row.attribute_columns}
|
||||
docs.append(
|
||||
Document(
|
||||
page_content=kv[Field.CONTENT_KEY.value], metadata=json.loads(kv[Field.METADATA_KEY.value])
|
||||
|
||||
@ -107,7 +107,7 @@ class Blob(BaseModel):
|
||||
Blob instance
|
||||
"""
|
||||
if mime_type is None and guess_type:
|
||||
_mimetype = mimetypes.guess_type(path)[0] if guess_type else None
|
||||
_mimetype = mimetypes.guess_type(path)[0]
|
||||
else:
|
||||
_mimetype = mime_type
|
||||
# We do not load the data immediately, instead we treat the blob as a
|
||||
|
||||
@ -23,7 +23,7 @@ class UnstructuredWordExtractor(BaseExtractor):
|
||||
unstructured_version = tuple(int(x) for x in __unstructured_version__.split("."))
|
||||
# check the file extension
|
||||
try:
|
||||
import magic # noqa: F401
|
||||
import magic # noqa: F401 # pyright: ignore[reportUnusedImport]
|
||||
|
||||
is_doc = detect_filetype(self._file_path) == FileType.DOC
|
||||
except ImportError:
|
||||
|
||||
@ -113,7 +113,7 @@ class QAIndexProcessor(BaseIndexProcessor):
|
||||
# Skip the first row
|
||||
df = pd.read_csv(file)
|
||||
text_docs = []
|
||||
for index, row in df.iterrows():
|
||||
for _, row in df.iterrows():
|
||||
data = Document(page_content=row.iloc[0], metadata={"answer": row.iloc[1]})
|
||||
text_docs.append(data)
|
||||
if len(text_docs) == 0:
|
||||
@ -183,7 +183,7 @@ class QAIndexProcessor(BaseIndexProcessor):
|
||||
qa_document.metadata["doc_hash"] = hash
|
||||
qa_documents.append(qa_document)
|
||||
format_documents.extend(qa_documents)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Failed to format qa document")
|
||||
|
||||
all_qa_documents.extend(format_documents)
|
||||
|
||||
@ -9,7 +9,6 @@ from typing import Any, Optional, Union, cast
|
||||
from flask import Flask, current_app
|
||||
from sqlalchemy import Float, and_, or_, select, text
|
||||
from sqlalchemy import cast as sqlalchemy_cast
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from core.app.app_config.entities import (
|
||||
DatasetEntity,
|
||||
@ -526,7 +525,7 @@ class DatasetRetrieval:
|
||||
)
|
||||
child_chunk = db.session.scalar(child_chunk_stmt)
|
||||
if child_chunk:
|
||||
segment = (
|
||||
_ = (
|
||||
db.session.query(DocumentSegment)
|
||||
.where(DocumentSegment.id == child_chunk.segment_id)
|
||||
.update(
|
||||
@ -593,9 +592,8 @@ class DatasetRetrieval:
|
||||
metadata_condition: Optional[MetadataCondition] = None,
|
||||
):
|
||||
with flask_app.app_context():
|
||||
with Session(db.engine) as session:
|
||||
dataset_stmt = select(Dataset).where(Dataset.id == dataset_id)
|
||||
dataset = session.scalar(dataset_stmt)
|
||||
dataset_stmt = select(Dataset).where(Dataset.id == dataset_id)
|
||||
dataset = db.session.scalar(dataset_stmt)
|
||||
|
||||
if not dataset:
|
||||
return []
|
||||
@ -987,7 +985,7 @@ class DatasetRetrieval:
|
||||
)
|
||||
|
||||
# handle invoke result
|
||||
result_text, usage = self._handle_invoke_result(invoke_result=invoke_result)
|
||||
result_text, _ = self._handle_invoke_result(invoke_result=invoke_result)
|
||||
|
||||
result_text_json = parse_and_check_json_markdown(result_text, [])
|
||||
automatic_metadata_filters = []
|
||||
@ -1002,7 +1000,7 @@ class DatasetRetrieval:
|
||||
"condition": item.get("comparison_operator"),
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
return None
|
||||
return automatic_metadata_filters
|
||||
|
||||
|
||||
@ -19,5 +19,5 @@ class StructuredChatOutputParser:
|
||||
return ReactAction(response["action"], response.get("action_input", {}), text)
|
||||
else:
|
||||
return ReactFinish({"output": text}, text)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
raise ValueError(f"Could not parse LLM output: {text}")
|
||||
|
||||
@ -38,5 +38,5 @@ class FunctionCallMultiDatasetRouter:
|
||||
# get retrieval model config
|
||||
return result.message.tool_calls[0].function.name
|
||||
return None
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@ -77,7 +77,7 @@ class ReactMultiDatasetRouter:
|
||||
user_id=user_id,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _react_invoke(
|
||||
@ -120,7 +120,7 @@ class ReactMultiDatasetRouter:
|
||||
memory=None,
|
||||
model_config=model_config,
|
||||
)
|
||||
result_text, usage = self._invoke_llm(
|
||||
result_text, _ = self._invoke_llm(
|
||||
completion_param=model_config.parameters,
|
||||
model_instance=model_instance,
|
||||
prompt_messages=prompt_messages,
|
||||
|
||||
Reference in New Issue
Block a user