Feat: support context window for docx (#12455)

### What problem does this PR solve? Feat: support context window for docx #12303 Done: - [x] naive.py - [x] one.py TODO: - [ ] book.py - [ ] manual.py Fix: incorrect image position Fix: incorrect chunk type tag ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality)
2026-01-19 11:45:10 +08:00 · 2026-01-07 15:08:17 +08:00
parent a442c9cac6
commit 011bbe9556
7 changed files with 397 additions and 120 deletions
--- a/api/db/services/task_service.py
+++ b/api/db/services/task_service.py
@ -529,6 +529,7 @@ def cancel_all_task_of(doc_id):
 def has_canceled(task_id):
    try:
        if REDIS_CONN.get(f"{task_id}-cancel"):
+            logging.info(f"Task: {task_id} has been canceled")
            return True
    except Exception as e:
        logging.exception(e)
--- a/deepdoc/parser/figure_parser.py
+++ b/deepdoc/parser/figure_parser.py
@ -25,7 +25,7 @@ from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk
 from rag.prompts.generator import vision_llm_figure_describe_prompt, vision_llm_figure_describe_prompt_with_context
 from rag.nlp import append_context2table_image4pdf

-
+# need to delete before pr
 def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
    if not figures_data_without_positions:
        return []
@ -38,7 +38,6 @@ def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
        if isinstance(figure_data[1], Image.Image)
    ]

-
 def vision_figure_parser_docx_wrapper(sections, tbls, callback=None,**kwargs):
    if not sections:
        return tbls
@ -124,8 +123,56 @@ def vision_figure_parser_pdf_wrapper(tbls, callback=None, **kwargs):
    return tbls


-shared_executor = ThreadPoolExecutor(max_workers=10)
+def vision_figure_parser_docx_wrapper_naive(chunks, idx_lst, callback=None, **kwargs):

+    print("\n\n hello here i am \n\n")
+
+    if not chunks:
+        return []
+    try:
+        vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
+        callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
+        print(" \n\n Yes vision model \n\n")
+    except Exception:
+        vision_model = None
+        print(" \n\n No vision model \n\n")
+    if vision_model:
+        @timeout(30, 3)
+        def worker(idx, ck):
+            context_above = ck.get("context_above", "")
+            context_below = ck.get("context_below", "")
+            if context_above or context_below:
+                prompt = vision_llm_figure_describe_prompt_with_context(
+                    # context_above + caption if any
+                    context_above=ck.get("context_above") + ck.get("text", ""),
+                    context_below=ck.get("context_below"),
+                )
+                logging.info(f"[VisionFigureParser] figure={idx} context_above_len={len(context_above)} context_below_len={len(context_below)} prompt=with_context")
+                logging.info(f"[VisionFigureParser] figure={idx} context_above_snippet={context_above[:512]}")
+                logging.info(f"[VisionFigureParser] figure={idx} context_below_snippet={context_below[:512]}")
+            else:
+                prompt = vision_llm_figure_describe_prompt()
+                logging.info(f"[VisionFigureParser] figure={idx} context_len=0 prompt=default")
+
+            description_text = picture_vision_llm_chunk(
+                binary=ck.get("image"),
+                vision_model=vision_model,
+                prompt=prompt,
+                callback=callback,
+            )
+            return idx, description_text
+
+        with ThreadPoolExecutor(max_workers=10) as executor:
+            futures = [
+                executor.submit(worker, idx, chunks[idx])
+                for idx in idx_lst
+            ]
+
+            for future in as_completed(futures):
+                idx, description = future.result()
+                chunks[idx]['text'] += description
+    
+shared_executor = ThreadPoolExecutor(max_workers=10)    

 class VisionFigureParser:
    def __init__(self, vision_model, figures_data, *args, **kwargs):
--- a/rag/app/book.py
+++ b/rag/app/book.py
@ -87,10 +87,18 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
        callback(0.1, "Start to parse.")
        doc_parser = naive.Docx()
        # TODO: table of contents need to be removed
-        sections, tbls = doc_parser(
+        main_sections = doc_parser(
            filename, binary=binary, from_page=from_page, to_page=to_page)
+        
+        sections = []
+        tbls = []
+        for text, image, html in main_sections:
+            sections.append((text, image))
+            tbls.append(((None, html), ""))
+    
        remove_contents_table(sections, eng=is_english(
            random_choices([t for t, _ in sections], k=200)))
+
        tbls = vision_figure_parser_docx_wrapper(sections=sections, tbls=tbls, callback=callback, **kwargs)
        # tbls = [((None, lns), None) for lns in tbls]
        sections = [(item[0], item[1] if item[1] is not None else "") for item in sections if
--- a/rag/app/naive.py
+++ b/rag/app/naive.py
@ -23,6 +23,8 @@ from timeit import default_timer as timer
 from docx import Document
 from docx.image.exceptions import InvalidImageStreamError, UnexpectedEndOfFileError, UnrecognizedImageError
 from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
+from docx.table import Table as DocxTable
+from docx.text.paragraph import Paragraph
 from docx.opc.oxml import parse_xml
 from markdown import markdown
 from PIL import Image
@ -33,15 +35,15 @@ from api.db.services.llm_service import LLMBundle
 from rag.utils.file_utils import extract_embed_file, extract_links_from_pdf, extract_links_from_docx, extract_html
 from deepdoc.parser import DocxParser, ExcelParser, HtmlParser, JsonParser, MarkdownElementExtractor, MarkdownParser, \
    PdfParser, TxtParser
-from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_docx_wrapper, \
+from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_docx_wrapper_naive, \
    vision_figure_parser_pdf_wrapper
 from deepdoc.parser.pdf_parser import PlainParser, VisionParser
 from deepdoc.parser.docling_parser import DoclingParser
 from deepdoc.parser.tcadp_parser import TCADPParser
 from common.parser_config_utils import normalize_layout_recognizer
 from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, \
-    tokenize_chunks, tokenize_chunks_with_images, tokenize_table, attach_media_context, append_context2table_image4pdf
-
+    tokenize_chunks, doc_tokenize_chunks_with_images, tokenize_table, append_context2table_image4pdf, tokenize_chunks_with_images, \
+    attach_media_context  # noqa: F401

 def by_deepdoc(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, pdf_cls=None,
               **kwargs):
@ -343,67 +345,116 @@ class Docx(DocxParser):
        pn = 0
        lines = []
        last_image = None
-        for p in self.doc.paragraphs:
+        table_idx = 0
+
+        def flush_last_image():
+            nonlocal last_image, lines
+            if last_image is not None:
+                lines.append({"text": "", "image": last_image, "table": None, "style": "Image"})
+                last_image = None
+
+        for block in self.doc._element.body:
            if pn > to_page:
                break
-            if from_page <= pn < to_page:
-                if p.text.strip():
-                    if p.style and p.style.name == 'Caption':
-                        former_image = None
-                        if lines and lines[-1][1] and lines[-1][2] != 'Caption':
-                            former_image = lines[-1][1].pop()
-                        elif last_image:
-                            former_image = last_image
-                            last_image = None
-                        lines.append((self.__clean(p.text), [former_image], p.style.name))
+
+            if block.tag.endswith('p'):
+                p = Paragraph(block, self.doc)
+
+                if from_page <= pn < to_page:
+                    text = p.text.strip()
+                    style_name = p.style.name if p.style else ""
+
+                    if text:
+                        if style_name == "Caption":
+                            former_image = None
+
+                            if lines and lines[-1].get("image") and lines[-1].get("style") != "Caption":
+                                former_image = lines[-1].get("image")
+                                lines.pop()
+
+                            elif last_image is not None:
+                                former_image = last_image
+                                last_image = None
+
+                            lines.append(
+                                {
+                                    "text": self.__clean(text),
+                                    "image": former_image if former_image else None,
+                                    "table": None,
+                                }
+                            )
+
+                        else:
+                            flush_last_image()
+                            lines.append(
+                                {
+                                    "text": self.__clean(text),
+                                    "image": None,
+                                    "table": None,
+                                }
+                            )
+
+                            current_image = self.get_picture(self.doc, p)
+                            if current_image is not None:
+                                lines.append(
+                                    {
+                                        "text": "",
+                                        "image": current_image,
+                                        "table": None,
+                                    }
+                                )
+
                    else:
                        current_image = self.get_picture(self.doc, p)
-                        image_list = [current_image]
-                        if last_image:
-                            image_list.insert(0, last_image)
-                            last_image = None
-                        lines.append((self.__clean(p.text), image_list, p.style.name if p.style else ""))
-                else:
-                    if current_image := self.get_picture(self.doc, p):
-                        if lines:
-                            lines[-1][1].append(current_image)
-                        else:
+                        if current_image is not None:
                            last_image = current_image
-            for run in p.runs:
-                if 'lastRenderedPageBreak' in run._element.xml:
-                    pn += 1
-                    continue
-                if 'w:br' in run._element.xml and 'type="page"' in run._element.xml:
-                    pn += 1
-        new_line = [(line[0], reduce(concat_img, line[1]) if line[1] else None) for line in lines]

-        tbls = []
-        for i, tb in enumerate(self.doc.tables):
-            title = self.__get_nearest_title(i, filename)
-            html = "<table>"
-            if title:
-                html += f"<caption>Table Location: {title}</caption>"
-            for r in tb.rows:
-                html += "<tr>"
-                i = 0
-                try:
-                    while i < len(r.cells):
-                        span = 1
-                        c = r.cells[i]
-                        for j in range(i + 1, len(r.cells)):
-                            if c.text == r.cells[j].text:
-                                span += 1
-                                i = j
-                            else:
-                                break
-                        i += 1
-                        html += f"<td>{c.text}</td>" if span == 1 else f"<td colspan='{span}'>{c.text}</td>"
-                except Exception as e:
-                    logging.warning(f"Error parsing table, ignore: {e}")
-                html += "</tr>"
-            html += "</table>"
-            tbls.append(((None, html), ""))
-        return new_line, tbls
+                for run in p.runs:
+                    xml = run._element.xml
+                    if "lastRenderedPageBreak" in xml:
+                        pn += 1
+                        continue
+                    if "w:br" in xml and 'type="page"' in xml:
+                        pn += 1
+
+            elif block.tag.endswith('tbl'):
+                if pn < from_page or pn > to_page:
+                    table_idx += 1
+                    continue
+
+                flush_last_image()
+                tb = DocxTable(block, self.doc)
+                title = self.__get_nearest_title(table_idx, filename)
+                html = "<table>"
+                if title:
+                    html += f"<caption>Table Location: {title}</caption>"
+                for r in tb.rows:
+                    html += "<tr>"
+                    col_idx = 0
+                    try:
+                        while col_idx < len(r.cells):
+                            span = 1
+                            c = r.cells[col_idx]
+                            for j in range(col_idx + 1, len(r.cells)):
+                                if c.text == r.cells[j].text:
+                                    span += 1
+                                    col_idx = j
+                                else:
+                                    break
+                            col_idx += 1
+                            html += f"<td>{c.text}</td>" if span == 1 else f"<td colspan='{span}'>{c.text}</td>"
+                    except Exception as e:
+                        logging.warning(f"Error parsing table, ignore: {e}")
+                    html += "</tr>"
+                html += "</table>"
+                lines.append({"text": "", "image": None, "table": html})
+                table_idx += 1
+
+        flush_last_image()
+        new_line = [(line.get("text"), line.get("image"), line.get("table")) for line in lines]
+
+        return new_line
+

    def to_markdown(self, filename=None, binary=None, inline_images: bool = True):
        """
@ -727,26 +778,26 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca

        # fix "There is no item named 'word/NULL' in the archive", referring to https://github.com/python-openxml/python-docx/issues/1105#issuecomment-1298075246
        _SerializedRelationships.load_from_xml = load_from_xml_v2
-        sections, tables = Docx()(filename, binary)

-        tables = vision_figure_parser_docx_wrapper(sections=sections, tbls=tables, callback=callback, **kwargs)
-
-        res = tokenize_table(tables, doc, is_english)
-        callback(0.8, "Finish parsing.")
-
-        st = timer()
+        # sections = (text, image, tables)
+        sections = Docx()(filename, binary)

+        # chunks list[dict]
+        # images list - index of image chunk in chunks
        chunks, images = naive_merge_docx(
            sections, int(parser_config.get(
                "chunk_token_num", 128)), parser_config.get(
-                "delimiter", "\n!?。；！？"))
+                "delimiter", "\n!?。；！？"), table_context_size, image_context_size)
+        
+        vision_figure_parser_docx_wrapper_naive(chunks=chunks, idx_lst=images, callback=callback, **kwargs)

-        res.extend(tokenize_chunks_with_images(chunks, doc, is_english, images, child_delimiters_pattern=child_deli))
+        callback(0.8, "Finish parsing.")
+        st = timer()
+
+        res.extend(doc_tokenize_chunks_with_images(chunks, doc, is_english, child_delimiters_pattern=child_deli))
        logging.info("naive_merge({}): {}".format(filename, timer() - st))
        res.extend(embed_res)
        res.extend(url_res)
-        if table_context_size or image_context_size:
-            attach_media_context(res, table_context_size, image_context_size)
        return res

    elif re.search(r"\.pdf$", filename, re.IGNORECASE):
@ -1012,7 +1063,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
        res.extend(embed_res)
    if url_res:
        res.extend(url_res)
-    #if table_context_size or image_context_size:
+    # if table_context_size or image_context_size:
    #    attach_media_context(res, table_context_size, image_context_size)
    return res

--- a/rag/app/one.py
+++ b/rag/app/one.py
@ -22,7 +22,7 @@ from deepdoc.parser.utils import get_text
 from rag.app import naive
 from rag.nlp import rag_tokenizer, tokenize
 from deepdoc.parser import PdfParser, ExcelParser, HtmlParser
-from deepdoc.parser.figure_parser import vision_figure_parser_docx_wrapper
+from deepdoc.parser.figure_parser import vision_figure_parser_docx_wrapper_naive
 from rag.app.naive import by_plaintext, PARSERS
 from common.parser_config_utils import normalize_layout_recognizer

@ -76,11 +76,26 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,

    if re.search(r"\.docx$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")
-        sections, tbls = naive.Docx()(filename, binary)
-        tbls = vision_figure_parser_docx_wrapper(sections=sections, tbls=tbls, callback=callback, **kwargs)
-        sections = [s for s, _ in sections if s]
-        for (_, html), _ in tbls:
-            sections.append(html)
+        sections = naive.Docx()(filename, binary)
+        cks = []
+        image_idxs = []
+
+        for text, image, table in sections:
+            if table is not None:
+                text = (text or "") + str(table)
+                ck_type = "table"
+            else:
+                ck_type = "image" if image is not None else "text"
+
+            if ck_type == "image":
+                image_idxs.append(len(cks))
+
+            cks.append({"text": text, "image": image, "ck_type": ck_type})
+
+        vision_figure_parser_docx_wrapper_naive(cks, image_idxs, callback, **kwargs)
+        for ck in cks:
+            print(ck)
+        sections = [ck["text"] for ck in cks if ck.get("text")]
        callback(0.8, "Finish parsing.")

    elif re.search(r"\.pdf$", filename, re.IGNORECASE):
--- a/rag/nlp/init.py
+++ b/rag/nlp/init.py
@ -316,6 +316,32 @@ def tokenize_chunks(chunks, doc, eng, pdf_parser=None, child_delimiters_pattern=
    return res


+def doc_tokenize_chunks_with_images(chunks, doc, eng, child_delimiters_pattern=None, batch_size=10):
+    res = []
+    for ii, ck in enumerate(chunks):
+        text = ck.get('context_above', "") + ck.get('text') + ck.get('context_below', "")
+        if len(text.strip()) == 0:
+            continue
+        logging.debug("-- {}".format(ck))
+        d = copy.deepcopy(doc)
+        if ck.get("image"):
+            d["image"] = ck.get("image")
+        add_positions(d, [[ii] * 5])
+
+        if ck.get("ck_type") == "text":
+            if child_delimiters_pattern:
+                d["mom_with_weight"] = ck
+                res.extend(split_with_pattern(d, child_delimiters_pattern, text, eng))
+                continue
+        elif ck.get("ck_type") == "image":
+            d["doc_type_kwd"] = "image"
+        elif ck.get("ck_type") == "table":
+            d["doc_type_kwd"] = "table"
+        tokenize(d, text, eng)
+        res.append(d)
+    return res
+
+
 def tokenize_chunks_with_images(chunks, doc, eng, images, child_delimiters_pattern=None):
    res = []
    # wrap up as es documents
@ -789,6 +815,11 @@ def append_context2table_image4pdf(sections: list, tabls: list, table_context_si
        if len(contexts) < len(res) + 1:
            contexts.append(("", ""))
        res.append(((img, tb), poss))
+
+        print("\n\n")
+        for c in contexts:
+            print(c)
+        print("\n\n")
    return contexts if return_context else res


@ -1200,57 +1231,181 @@ def concat_img(img1, img2):
    new_image.paste(img2, (0, height1))
    return new_image

-
-def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。；！？"):
-    if not sections:
-        return [], []
-
+def _build_cks(sections, delimiter):
    cks = []
+    tables = []
    images = []
-    tk_nums = []
-
-    def add_chunk(t, image, pos=""):
-        nonlocal cks, images, tk_nums
-        tnum = num_tokens_from_string(t)
-        if tnum < 8:
-            pos = ""
-
-        if not cks or tk_nums[-1] > chunk_token_num:
-            # new chunk
-            if pos and t.find(pos) < 0:
-                t += pos
-            cks.append(t)
-            images.append(image)
-            tk_nums.append(tnum)
-        else:
-            # add to last chunk
-            if pos and cks[-1].find(pos) < 0:
-                t += pos
-            cks[-1] += t
-            images[-1] = concat_img(images[-1], image)
-            tk_nums[-1] += tnum

    custom_delimiters = [m.group(1) for m in re.finditer(r"`([^`]+)`", delimiter)]
    has_custom = bool(custom_delimiters)
+
    if has_custom:
-        custom_pattern = "|".join(re.escape(t) for t in sorted(set(custom_delimiters), key=len, reverse=True))
-        cks, images, tk_nums = [], [], []
+        custom_pattern = "|".join(
+            re.escape(t) for t in sorted(set(custom_delimiters), key=len, reverse=True)
+        )
        pattern = r"(%s)" % custom_pattern
-        for sec, image in sections:
-            split_sec = re.split(pattern, sec)
+
+    for text, image, table in sections:
+        # normalize text
+        if not text:
+            text = "\n"
+        else:
+            text = "\n" + str(text)
+
+        if table:
+            # table ck
+            ck_text = text + str(table)
+            idx = len(cks)
+            cks.append({"text": ck_text, "image": image, "ck_type": "table", "tk_nums": num_tokens_from_string(ck_text)})
+            tables.append(idx)
+            continue
+
+        if image:
+            # image ck (text can be kept as-is; depends on your downstream)
+            idx = len(cks)
+            cks.append({"text": text, "image": image, "ck_type": "image", "tk_nums": num_tokens_from_string(text)})
+            images.append(idx)
+            continue
+
+        # pure text ck(s)
+        if has_custom:
+            split_sec = re.split(pattern, text)
            for sub_sec in split_sec:
                if not sub_sec or re.fullmatch(custom_pattern, sub_sec):
                    continue
-                text_seg = "\n" + sub_sec
-                cks.append(text_seg)
-                images.append(image)
-                tk_nums.append(num_tokens_from_string(text_seg))
-        return cks, images
+                seg = "\n" + sub_sec if not sub_sec.startswith("\n") else sub_sec
+                cks.append({"text": seg, "image": None, "ck_type": "text", "tk_nums": num_tokens_from_string(seg)})
+        else:
+            cks.append({"text": text, "image": None, "ck_type": "text", "tk_nums": num_tokens_from_string(text)})

-    for sec, image in sections:
-        add_chunk("\n" + sec, image, "")
+    return cks, tables, images

-    return cks, images
+
+def _add_context(cks, idx, context_size):
+    if cks[idx]["ck_type"] not in ("image", "table"):
+        return
+
+    prev = idx - 1
+    after = idx + 1
+    remain_above = context_size
+    remain_below = context_size
+
+    cks[idx]["context_above"] = ""
+    cks[idx]["context_below"] = ""
+
+    split_pat = r"([。!?？；！\n]|\. )"
+
+    picked_above = []
+    picked_below = []
+
+    def take_sentences_from_end(cnt, need_tokens):
+        txts = re.split(split_pat, cnt, flags=re.DOTALL)
+        sents = []
+        for j in range(0, len(txts), 2):
+            sents.append(txts[j] + (txts[j + 1] if j + 1 < len(txts) else ""))
+        acc = ""
+        for s in reversed(sents):
+            acc = s + acc
+            if num_tokens_from_string(acc) >= need_tokens:
+                break
+        return acc
+
+    def take_sentences_from_start(cnt, need_tokens):
+        txts = re.split(split_pat, cnt, flags=re.DOTALL)
+        acc = ""
+        for j in range(0, len(txts), 2):
+            acc += txts[j] + (txts[j + 1] if j + 1 < len(txts) else "")
+            if num_tokens_from_string(acc) >= need_tokens:
+                break
+        return acc
+
+    # above
+    parts_above = []
+    while prev >= 0 and remain_above > 0:
+        if cks[prev]["ck_type"] == "text":
+            tk = cks[prev]["tk_nums"]
+            if tk >= remain_above:
+                piece = take_sentences_from_end(cks[prev]["text"], remain_above)
+                parts_above.insert(0, piece)
+                picked_above.append((prev, "tail", remain_above, tk, piece[:80]))
+                remain_above = 0
+                break
+            else:
+                parts_above.insert(0, cks[prev]["text"])
+                picked_above.append((prev, "full", remain_above, tk, (cks[prev]["text"] or "")[:80]))
+                remain_above -= tk
+        prev -= 1
+
+    # below
+    parts_below = []
+    while after < len(cks) and remain_below > 0:
+        if cks[after]["ck_type"] == "text":
+            tk = cks[after]["tk_nums"]
+            if tk >= remain_below:
+                piece = take_sentences_from_start(cks[after]["text"], remain_below)
+                parts_below.append(piece)
+                picked_below.append((after, "head", remain_below, tk, piece[:80]))
+                remain_below = 0
+                break
+            else:
+                parts_below.append(cks[after]["text"])
+                picked_below.append((after, "full", remain_below, tk, (cks[after]["text"] or "")[:80]))
+                remain_below -= tk
+        after += 1
+
+    cks[idx]["context_above"] = "".join(parts_above) if parts_above else ""
+    cks[idx]["context_below"] = "".join(parts_below) if parts_below else ""
+
+
+def _merge_cks(cks, chunk_token_num):
+    merged = []
+    image_idxs = []
+    prev_text_ck = -1
+    
+    for i in range(len(cks)):
+        ck_type = cks[i]["ck_type"]
+
+        if ck_type != "text":
+            merged.append(cks[i])
+            if ck_type == "image":
+                image_idxs.append(len(merged) - 1)
+            continue
+        
+        
+        if prev_text_ck<0 or merged[prev_text_ck]["tk_nums"] >= chunk_token_num:
+            merged.append(cks[i])
+            prev_text_ck = len(merged) - 1
+            continue
+
+        merged[prev_text_ck]["text"] = (merged[prev_text_ck].get("text") or "") + (cks[i].get("text") or "")
+        merged[prev_text_ck]["tk_nums"] = merged[prev_text_ck].get("tk_nums", 0) + cks[i].get("tk_nums", 0)
+
+    return merged, image_idxs
+
+
+def naive_merge_docx(
+    sections, 
+    chunk_token_num = 128, 
+    delimiter="\n。；！？",
+    table_context_size=0,
+    image_context_size=0,):
+
+    if not sections:
+        return [], []
+    
+    cks, tables, images = _build_cks(sections, delimiter)
+
+    if table_context_size > 0:
+        for i in tables:
+            _add_context(cks, i, table_context_size)
+    
+    if image_context_size > 0:
+        for i in images:
+            _add_context(cks, i, image_context_size)
+    
+    merged_cks, merged_image_idx = _merge_cks(cks, chunk_token_num)
+
+    return merged_cks, merged_image_idx


 def extract_between(text: str, start_tag: str, end_tag: str) -> list[str]:
--- a/rag/svr/task_executor.py
+++ b/rag/svr/task_executor.py
@ -1127,7 +1127,7 @@ async def do_handle_task(task):
        if has_canceled(task_id):
            try:
                exists = await asyncio.to_thread(
-                    settings.docStoreConn.indexExist,
+                    settings.docStoreConn.index_exist,
                    search.index_name(task_tenant_id),
                    task_dataset_id,
                )