Files
ragflow/test/unit_test/api/utils/test_api_file_utils.py
Liu An 7715bad04e refactor: reorganize unit test files into appropriate directories (#13343)
### What problem does this PR solve?

Move test files from utils/ to their corresponding functional
directories:
- api/db/ for database related tests
- api/utils/ for API utility tests
- rag/utils/ for RAG utility tests

### Type of change

- [x] Refactoring
2026-03-04 11:02:56 +08:00

153 lines
4.8 KiB
Python

#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Unit tests for api.utils.file_utils (filename_type, thumbnail_img, sanitize_path, read_potential_broken_pdf)."""
import pytest
from api.db import FileType
from api.utils.file_utils import (
MAX_BLOB_SIZE_PDF,
MAX_BLOB_SIZE_THUMBNAIL,
GHOSTSCRIPT_TIMEOUT_SEC,
filename_type,
thumbnail_img,
thumbnail,
sanitize_path,
read_potential_broken_pdf,
repair_pdf_with_ghostscript,
)
class TestFilenameType:
"""Edge cases and robustness for filename_type."""
@pytest.mark.parametrize("filename,expected", [
("doc.pdf", FileType.PDF.value),
("a.PDF", FileType.PDF.value),
("x.png", FileType.VISUAL.value),
("file.docx", FileType.DOC.value),
("a/b/c.pdf", FileType.PDF.value),
("path/to/file.txt", FileType.DOC.value),
])
def test_valid_filenames(self, filename, expected):
assert filename_type(filename) == expected
@pytest.mark.parametrize("filename", [
None,
"",
" ",
123,
[],
])
def test_invalid_or_empty_returns_other(self, filename):
assert filename_type(filename) == FileType.OTHER.value
def test_path_with_basename_uses_extension(self):
assert filename_type("folder/subfolder/document.pdf") == FileType.PDF.value
class TestSanitizePath:
"""Edge cases for sanitize_path."""
@pytest.mark.parametrize("raw,expected", [
(None, ""),
("", ""),
(" ", ""),
(42, ""),
("a/b", "a/b"),
("a/../b", "a/b"),
("/leading/", "leading"),
("\\mixed\\path", "mixed/path"),
])
def test_sanitize_cases(self, raw, expected):
assert sanitize_path(raw) == expected
class TestReadPotentialBrokenPdf:
"""Edge cases and robustness for read_potential_broken_pdf."""
def test_none_returns_empty_bytes(self):
assert read_potential_broken_pdf(None) == b""
def test_empty_bytes_returns_as_is(self):
assert read_potential_broken_pdf(b"") == b""
def test_non_len_raises_or_returns_empty(self):
class NoLen:
pass
result = read_potential_broken_pdf(NoLen())
assert result == b""
class TestThumbnailImg:
"""Edge cases for thumbnail_img."""
def test_none_blob_returns_none(self):
assert thumbnail_img("x.pdf", None) is None
def test_none_filename_returns_none(self):
assert thumbnail_img(None, b"fake pdf content") is None
def test_empty_blob_returns_none(self):
assert thumbnail_img("x.pdf", b"") is None
def test_empty_filename_returns_none(self):
assert thumbnail_img("", b"x") is None
def test_oversized_blob_returns_none(self):
huge = b"x" * (MAX_BLOB_SIZE_THUMBNAIL + 1)
assert thumbnail_img("x.pdf", huge) is None
class TestThumbnail:
"""thumbnail() wraps thumbnail_img and returns base64 or empty string."""
def test_none_img_returns_empty_string(self):
assert thumbnail("x.xyz", b"garbage") == ""
def test_valid_img_returns_base64_prefix(self):
from api.constants import IMG_BASE64_PREFIX
result = thumbnail("x.png", b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\x0cIDATx\x9cc\xf8\x0f\x00\x00\x01\x01\x00\x05\x18\xd8N\x00\x00\x00\x00IEND\xaeB`\x82")
assert result.startswith(IMG_BASE64_PREFIX) or result == ""
class TestRepairPdfWithGhostscript:
"""repair_pdf_with_ghostscript edge cases."""
def test_none_returns_empty_bytes(self):
assert repair_pdf_with_ghostscript(None) == b""
def test_empty_bytes_returns_empty(self):
assert repair_pdf_with_ghostscript(b"") == b""
def test_oversized_returns_original_without_calling_gs(self):
huge = b"%" * (MAX_BLOB_SIZE_PDF + 1)
result = repair_pdf_with_ghostscript(huge)
assert result == huge
class TestConstants:
"""Resource limit constants are positive and reasonable."""
def test_thumbnail_limit_positive(self):
assert MAX_BLOB_SIZE_THUMBNAIL > 0
def test_pdf_limit_positive(self):
assert MAX_BLOB_SIZE_PDF > 0
def test_gs_timeout_positive(self):
assert GHOSTSCRIPT_TIMEOUT_SEC > 0