diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py index 229017696..d52d1e733 100644 --- a/api/apps/chunk_app.py +++ b/api/apps/chunk_app.py @@ -23,6 +23,7 @@ from quart import request from api.db.services.document_service import DocumentService from api.db.services.doc_metadata_service import DocMetadataService +from api.utils.image_utils import store_chunk_image from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.llm_service import LLMBundle from common.metadata_utils import apply_meta_data_filter @@ -318,6 +319,7 @@ async def create(): d["create_timestamp_flt"] = datetime.datetime.now().timestamp() if "tag_feas" in req: d["tag_feas"] = req["tag_feas"] + image_base64 = req.get("image_base64", None) try: def _log_response(resp, code, message): @@ -365,14 +367,21 @@ async def create(): embd_model_config = get_tenant_default_model_by_type(tenant_id, LLMType.EMBEDDING) embd_mdl = LLMBundle(tenant_id, embd_model_config) + if image_base64: + d["img_id"] = "{}-{}".format(doc.kb_id, chunck_id) + d["doc_type_kwd"] = "image" + v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not d["question_kwd"] else "\n".join(d["question_kwd"])]) v = 0.1 * v[0] + 0.9 * v[1] d["q_%d_vec" % len(v)] = v.tolist() settings.docStoreConn.insert([d], search.index_name(tenant_id), doc.kb_id) + if image_base64: + store_chunk_image(doc.kb_id, chunck_id, base64.b64decode(image_base64)) + DocumentService.increment_chunk_num( doc.id, doc.kb_id, c, 1, 0) - resp = get_json_result(data={"chunk_id": chunck_id}) + resp = get_json_result(data={"chunk_id": chunck_id, "image_id": d.get("img_id", "")}) _log_response(resp, RetCode.SUCCESS, "success") return resp diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 2537ec4c0..eb71e41bc 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -48,6 +48,7 @@ from common.string_utils import remove_redundant_spaces from common.misc_utils import thread_pool_exec from common.constants import RetCode, LLMType, ParserType, TaskStatus, FileSource from common import settings +from api.utils.image_utils import store_chunk_image MAXIMUM_OF_UPLOADING_FILES = 256 @@ -1190,6 +1191,9 @@ async def add_chunk(tenant_id, dataset_id, document_id): items: type: string description: Important keywords. + image_base64: + type: string + description: Base64-encoded image to associate with the chunk. - in: header name: Authorization type: string @@ -1254,6 +1258,12 @@ async def add_chunk(tenant_id, dataset_id, document_id): d["tag_kwd"] = req["tag_kwd"] if "tag_feas" in req: d["tag_feas"] = req["tag_feas"] + import base64 + image_base64 = req.get("image_base64", None) + if image_base64: + d["img_id"] = "{}-{}".format(dataset_id, chunk_id) + d["doc_type_kwd"] = "image" + tenant_embd_id = DocumentService.get_tenant_embd_id(document_id) if tenant_embd_id: model_config = get_model_config_by_id(tenant_embd_id) @@ -1266,6 +1276,9 @@ async def add_chunk(tenant_id, dataset_id, document_id): d["q_%d_vec" % len(v)] = v.tolist() settings.docStoreConn.insert([d], search.index_name(tenant_id), dataset_id) + if image_base64: + store_chunk_image(dataset_id, chunk_id, base64.b64decode(image_base64)) + DocumentService.increment_chunk_num(doc.id, doc.kb_id, c, 1, 0) # rename keys key_mapping = { @@ -1278,6 +1291,7 @@ async def add_chunk(tenant_id, dataset_id, document_id): "create_timestamp_flt": "create_timestamp", "create_time": "create_time", "document_keyword": "document", + "img_id": "image_id", } renamed_chunk = {} for key, value in d.items(): diff --git a/api/utils/image_utils.py b/api/utils/image_utils.py new file mode 100644 index 000000000..7b93aa492 --- /dev/null +++ b/api/utils/image_utils.py @@ -0,0 +1,40 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from io import BytesIO + +from PIL import Image + +from common import settings + + +def store_chunk_image(bucket, name, image_binary): + if settings.STORAGE_IMPL.obj_exist(bucket, name): + old_binary = settings.STORAGE_IMPL.get(bucket, name) + old_img = Image.open(BytesIO(old_binary)) + new_img = Image.open(BytesIO(image_binary)) + old_img = old_img.convert("RGB") + new_img = new_img.convert("RGB") + width = max(old_img.width, new_img.width) + height = old_img.height + new_img.height + combined = Image.new("RGB", (width, height), (255, 255, 255)) + combined.paste(old_img, (0, 0)) + combined.paste(new_img, (0, old_img.height)) + buf = BytesIO() + combined.save(buf, format="JPEG") + settings.STORAGE_IMPL.put(bucket, name, buf.getvalue()) + else: + settings.STORAGE_IMPL.put(bucket, name, image_binary) diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index 0ba84c930..cfefe5afa 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -2005,6 +2005,7 @@ Adds a chunk to a specified document in a specified dataset. - Body: - `"content"`: `string` - `"important_keywords"`: `list[string]` + - `"image_base64"`: `string` ##### Request example @@ -2015,22 +2016,25 @@ curl --request POST \ --header 'Authorization: Bearer ' \ --data ' { - "content": "" + "content": "", + "image_base64": "" }' ``` ##### Request parameters -- `dataset_id`: (*Path parameter*) +- `dataset_id`: (*Path parameter*) The associated dataset ID. -- `document_ids`: (*Path parameter*) +- `document_ids`: (*Path parameter*) The associated document ID. -- `"content"`: (*Body parameter*), `string`, *Required* +- `"content"`: (*Body parameter*), `string`, *Required* The text content of the chunk. -- `"important_keywords`(*Body parameter*), `list[string]` +- `"important_keywords`(*Body parameter*), `list[string]` The key terms or phrases to tag with the chunk. - `"questions"`(*Body parameter*), `list[string]` If there is a given question, the embedded chunks will be based on them +- `"image_base64"`: (*Body parameter*), `string` + A base64-encoded image to associate with the chunk. If the chunk already has an image, the new image will be vertically concatenated below the existing one. #### Response @@ -2047,6 +2051,7 @@ Success: "dataset_id": "72f36e1ebdf411efb7250242ac120006", "document_id": "61d68474be0111ef98dd0242ac120006", "id": "12ccdc56e59837e5", + "image_id": "", "important_keywords": [], "questions": [] } diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md index f3a87d397..cd24cf252 100644 --- a/docs/references/python_api_reference.md +++ b/docs/references/python_api_reference.md @@ -855,7 +855,7 @@ print("Async bulk parsing cancelled.") ### Add chunk ```python -Document.add_chunk(content:str, important_keywords:list[str] = []) -> Chunk +Document.add_chunk(content:str, important_keywords:list[str] = [], image_base64:str = None) -> Chunk ``` Adds a chunk to the current document. @@ -870,6 +870,10 @@ The text content of the chunk. The key terms or phrases to tag with the chunk. +##### image_base64: `str` + +A base64-encoded image to associate with the chunk. If the chunk already has an image, the new image will be vertically concatenated below the existing one. + #### Returns - Success: A `Chunk` object. @@ -880,6 +884,7 @@ A `Chunk` object contains the following attributes: - `id`: `str`: The chunk ID. - `content`: `str` The text content of the chunk. - `important_keywords`: `list[str]` A list of key terms or phrases tagged with the chunk. +- `image_id`: `str` The image ID associated with the chunk (empty string if no image). - `create_time`: `str` The time when the chunk was created (added to the document). - `create_timestamp`: `float` The timestamp representing the creation time of the chunk, expressed in seconds since January 1, 1970. - `dataset_id`: `str` The ID of the associated dataset. @@ -902,6 +907,16 @@ doc = doc[0] chunk = doc.add_chunk(content="xxxxxxx") ``` +Adding a chunk with an image: + +```python +import base64 + +with open("image.jpg", "rb") as f: + img_b64 = base64.b64encode(f.read()).decode() +chunk = doc.add_chunk(content="description of image", image_base64=img_b64) +``` + --- ### List chunks diff --git a/sdk/python/ragflow_sdk/modules/document.py b/sdk/python/ragflow_sdk/modules/document.py index ebbf553c8..e410fa9cb 100644 --- a/sdk/python/ragflow_sdk/modules/document.py +++ b/sdk/python/ragflow_sdk/modules/document.py @@ -87,8 +87,11 @@ class Document(Base): return chunks raise Exception(res.get("message")) - def add_chunk(self, content: str, important_keywords: list[str] = [], questions: list[str] = []): - res = self.post(f"/datasets/{self.dataset_id}/documents/{self.id}/chunks", {"content": content, "important_keywords": important_keywords, "questions": questions}) + def add_chunk(self, content: str, important_keywords: list[str] = [], questions: list[str] = [], image_base64: str | None = None): + body = {"content": content, "important_keywords": important_keywords, "questions": questions} + if image_base64 is not None: + body["image_base64"] = image_base64 + res = self.post(f"/datasets/{self.dataset_id}/documents/{self.id}/chunks", body) res = res.json() if res.get("code") == 0: return Chunk(self.rag, res["data"].get("chunk"))