fix(jira): prevent missed incremental updates after issue edits (#13674)

### What problem does this PR solve?

Fixes [#13505](https://github.com/infiniflow/ragflow/issues/13505): Jira
incremental sync could miss updated issues after initial sync,
especially near time boundaries.

Root cause:
- Jira JQL uses minute-level precision for `updated` filters.
- Incremental windows had no overlap buffer, so boundary updates could
be skipped.
- Sync log cursor tracking used a backward-facing update for
`poll_range_start`.
- Existing-doc updates in `upload_document` lacked a KB ownership guard
for doc-id collisions.

What changed:
- Added Jira incremental overlap buffer (`time_buffer_seconds`,
defaulting to `JIRA_SYNC_TIME_BUFFER_SECONDS`) when building JQL
lower-bound time.
- Preserved second-level post-filtering to avoid duplicate reprocessing
while still catching boundary updates.
- Improved Jira sync logging to include start/end window and overlap
configuration.
- Updated sync cursor tracking in `increase_docs` to keep
`poll_range_start` moving forward with max update time.
- Added KB ID safety check before updating existing document records in
`upload_document`.

Verification performed:
- Python syntax compile checks passed for modified files.
- Manual verification flow:
  1. Run full Jira sync.
  2. Edit an already-indexed Jira issue.
  3. Run next incremental sync.
  4. Confirm updated content is re-ingested into KB.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
NeedmeFordev
2026-03-18 09:31:05 -06:00
committed by GitHub
parent dee68c571b
commit c3f79dbcb0
5 changed files with 183 additions and 8 deletions

View File

@ -0,0 +1,122 @@
#
# Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import importlib.util
import sys
import types
import warnings
from types import SimpleNamespace
import pytest
warnings.filterwarnings(
"ignore",
message="pkg_resources is deprecated as an API.*",
category=UserWarning,
)
def _install_cv2_stub_if_unavailable():
try:
importlib.import_module("cv2")
return
except Exception:
pass
stub = types.ModuleType("cv2")
stub.INTER_LINEAR = 1
stub.INTER_CUBIC = 2
stub.BORDER_CONSTANT = 0
stub.BORDER_REPLICATE = 1
def _missing(*_args, **_kwargs):
raise RuntimeError("cv2 runtime call is unavailable in this test environment")
def _module_getattr(name):
if name.isupper():
return 0
return _missing
stub.__getattr__ = _module_getattr
sys.modules["cv2"] = stub
def _install_xgboost_stub_if_unavailable():
if "xgboost" in sys.modules:
return
if importlib.util.find_spec("xgboost") is not None:
return
sys.modules["xgboost"] = types.ModuleType("xgboost")
_install_cv2_stub_if_unavailable()
_install_xgboost_stub_if_unavailable()
from api.db.services import file_service as file_service_module # noqa: E402
from api.db.services.file_service import FileService # noqa: E402
class _DummyUploadFile:
def __init__(self, filename, doc_id):
self.filename = filename
self.id = doc_id
def read(self):
raise AssertionError("read() should not be called for cross-KB collision path")
def _unwrapped_upload_document():
return FileService.upload_document.__func__.__wrapped__
@pytest.mark.p2
def test_upload_document_skips_cross_kb_document_id_collision(monkeypatch):
kb = SimpleNamespace(
id="kb-target",
tenant_id="tenant-1",
name="Target KB",
parser_id="default",
pipeline_id=None,
parser_config={},
)
existing_doc = SimpleNamespace(
id="doc-1",
kb_id="kb-other",
location="old-location.txt",
content_hash="old-hash",
to_dict=lambda: {"id": "doc-1"},
)
monkeypatch.setattr(FileService, "get_root_folder", classmethod(lambda cls, _uid: {"id": "root"}))
monkeypatch.setattr(FileService, "init_knowledgebase_docs", classmethod(lambda cls, _pf_id, _uid: None))
monkeypatch.setattr(FileService, "get_kb_folder", classmethod(lambda cls, _uid: {"id": "kb-root"}))
monkeypatch.setattr(
FileService,
"new_a_file_from_kb",
classmethod(lambda cls, _tenant_id, _name, _parent_id: {"id": "kb-folder"}),
)
monkeypatch.setattr(file_service_module.DocumentService, "get_by_id", lambda _doc_id: (True, existing_doc))
err, files = _unwrapped_upload_document()(
FileService,
kb,
[_DummyUploadFile(filename="collision.txt", doc_id="doc-1")],
"user-1",
)
assert files == []
assert len(err) == 1
assert err[0].startswith("collision.txt: ")
assert "Existing document id collision with another knowledge base; skipping update." in err[0]