Merge branch 'main' into fix/chore-fix

This commit is contained in:
Yeuoly
2024-12-04 15:34:39 +08:00
288 changed files with 8067 additions and 1950 deletions

View File

@ -110,8 +110,12 @@ class RetrievalService:
str(dataset.tenant_id), reranking_mode, reranking_model, weights, False
)
all_documents = data_post_processor.invoke(
query=query, documents=all_documents, score_threshold=score_threshold, top_n=top_k
query=query,
documents=all_documents,
score_threshold=score_threshold,
top_n=top_k,
)
return all_documents
@classmethod
@ -178,7 +182,10 @@ class RetrievalService:
)
all_documents.extend(
data_post_processor.invoke(
query=query, documents=documents, score_threshold=score_threshold, top_n=len(documents)
query=query,
documents=documents,
score_threshold=score_threshold,
top_n=len(documents),
)
)
else:
@ -220,7 +227,10 @@ class RetrievalService:
)
all_documents.extend(
data_post_processor.invoke(
query=query, documents=documents, score_threshold=score_threshold, top_n=len(documents)
query=query,
documents=documents,
score_threshold=score_threshold,
top_n=len(documents),
)
)
else:

View File

@ -104,8 +104,7 @@ class OceanBaseVector(BaseVector):
val = int(row[6])
vals.append(val)
if len(vals) == 0:
print("ob_vector_memory_limit_percentage not found in parameters.")
exit(1)
raise ValueError("ob_vector_memory_limit_percentage not found in parameters.")
if any(val == 0 for val in vals):
try:
self._client.perform_raw_text_sql("ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30")
@ -200,10 +199,10 @@ class OceanBaseVectorFactory(AbstractVectorFactory):
return OceanBaseVector(
collection_name,
OceanBaseVectorConfig(
host=dify_config.OCEANBASE_VECTOR_HOST,
port=dify_config.OCEANBASE_VECTOR_PORT,
user=dify_config.OCEANBASE_VECTOR_USER,
host=dify_config.OCEANBASE_VECTOR_HOST or "",
port=dify_config.OCEANBASE_VECTOR_PORT or 0,
user=dify_config.OCEANBASE_VECTOR_USER or "",
password=(dify_config.OCEANBASE_VECTOR_PASSWORD or ""),
database=dify_config.OCEANBASE_VECTOR_DATABASE,
database=dify_config.OCEANBASE_VECTOR_DATABASE or "",
),
)

View File

@ -230,7 +230,6 @@ class OracleVector(BaseVector):
except LookupError:
nltk.download("punkt")
nltk.download("stopwords")
print("run download")
e_str = re.sub(r"[^\w ]", "", query)
all_tokens = nltk.word_tokenize(e_str)
stop_words = stopwords.words("english")

View File

@ -64,7 +64,7 @@ class UpstashVector(BaseVector):
item_ids = []
for doc_id in ids:
ids = self.get_ids_by_metadata_field("doc_id", doc_id)
if id:
if ids:
item_ids += ids
self._delete_by_ids(ids=item_ids)
@ -95,9 +95,10 @@ class UpstashVector(BaseVector):
metadata = record.metadata
text = record.data
score = record.score
metadata["score"] = score
if score > score_threshold:
docs.append(Document(page_content=text, metadata=metadata))
if metadata is not None and text is not None:
metadata["score"] = score
if score > score_threshold:
docs.append(Document(page_content=text, metadata=metadata))
return docs
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
@ -123,7 +124,7 @@ class UpstashVectorFactory(AbstractVectorFactory):
return UpstashVector(
collection_name=collection_name,
config=UpstashVectorConfig(
url=dify_config.UPSTASH_VECTOR_URL,
token=dify_config.UPSTASH_VECTOR_TOKEN,
url=dify_config.UPSTASH_VECTOR_URL or "",
token=dify_config.UPSTASH_VECTOR_TOKEN or "",
),
)