mirror of
https://github.com/langgenius/dify.git
synced 2026-05-03 08:58:09 +08:00
Merge branch 'main' into feat/end-user-oauth
This commit is contained in:
@ -7,7 +7,7 @@ import tempfile
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import Any
|
||||
|
||||
import chardet
|
||||
import charset_normalizer
|
||||
import docx
|
||||
import pandas as pd
|
||||
import pypandoc
|
||||
@ -228,9 +228,12 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str)
|
||||
|
||||
def _extract_text_from_plain_text(file_content: bytes) -> str:
|
||||
try:
|
||||
# Detect encoding using chardet
|
||||
result = chardet.detect(file_content)
|
||||
encoding = result["encoding"]
|
||||
# Detect encoding using charset_normalizer
|
||||
result = charset_normalizer.from_bytes(file_content, cp_isolation=["utf_8", "latin_1", "cp1252"]).best()
|
||||
if result:
|
||||
encoding = result.encoding
|
||||
else:
|
||||
encoding = "utf-8"
|
||||
|
||||
# Fallback to utf-8 if detection fails
|
||||
if not encoding:
|
||||
@ -247,9 +250,12 @@ def _extract_text_from_plain_text(file_content: bytes) -> str:
|
||||
|
||||
def _extract_text_from_json(file_content: bytes) -> str:
|
||||
try:
|
||||
# Detect encoding using chardet
|
||||
result = chardet.detect(file_content)
|
||||
encoding = result["encoding"]
|
||||
# Detect encoding using charset_normalizer
|
||||
result = charset_normalizer.from_bytes(file_content).best()
|
||||
if result:
|
||||
encoding = result.encoding
|
||||
else:
|
||||
encoding = "utf-8"
|
||||
|
||||
# Fallback to utf-8 if detection fails
|
||||
if not encoding:
|
||||
@ -269,9 +275,12 @@ def _extract_text_from_json(file_content: bytes) -> str:
|
||||
def _extract_text_from_yaml(file_content: bytes) -> str:
|
||||
"""Extract the content from yaml file"""
|
||||
try:
|
||||
# Detect encoding using chardet
|
||||
result = chardet.detect(file_content)
|
||||
encoding = result["encoding"]
|
||||
# Detect encoding using charset_normalizer
|
||||
result = charset_normalizer.from_bytes(file_content).best()
|
||||
if result:
|
||||
encoding = result.encoding
|
||||
else:
|
||||
encoding = "utf-8"
|
||||
|
||||
# Fallback to utf-8 if detection fails
|
||||
if not encoding:
|
||||
@ -424,9 +433,12 @@ def _extract_text_from_file(file: File):
|
||||
|
||||
def _extract_text_from_csv(file_content: bytes) -> str:
|
||||
try:
|
||||
# Detect encoding using chardet
|
||||
result = chardet.detect(file_content)
|
||||
encoding = result["encoding"]
|
||||
# Detect encoding using charset_normalizer
|
||||
result = charset_normalizer.from_bytes(file_content).best()
|
||||
if result:
|
||||
encoding = result.encoding
|
||||
else:
|
||||
encoding = "utf-8"
|
||||
|
||||
# Fallback to utf-8 if detection fails
|
||||
if not encoding:
|
||||
|
||||
@ -64,7 +64,10 @@ class DifyNodeFactory(NodeFactory):
|
||||
if not node_mapping:
|
||||
raise ValueError(f"No class mapping found for node type: {node_type}")
|
||||
|
||||
node_class = node_mapping.get(LATEST_VERSION)
|
||||
latest_node_class = node_mapping.get(LATEST_VERSION)
|
||||
node_version = str(node_data.get("version", "1"))
|
||||
matched_node_class = node_mapping.get(node_version)
|
||||
node_class = matched_node_class or latest_node_class
|
||||
if not node_class:
|
||||
raise ValueError(f"No latest version class found for node type: {node_type}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user