fix(jira): prevent missed incremental updates after issue edits (#13674)

### What problem does this PR solve?

Fixes [#13505](https://github.com/infiniflow/ragflow/issues/13505): Jira
incremental sync could miss updated issues after initial sync,
especially near time boundaries.

Root cause:
- Jira JQL uses minute-level precision for `updated` filters.
- Incremental windows had no overlap buffer, so boundary updates could
be skipped.
- Sync log cursor tracking used a backward-facing update for
`poll_range_start`.
- Existing-doc updates in `upload_document` lacked a KB ownership guard
for doc-id collisions.

What changed:
- Added Jira incremental overlap buffer (`time_buffer_seconds`,
defaulting to `JIRA_SYNC_TIME_BUFFER_SECONDS`) when building JQL
lower-bound time.
- Preserved second-level post-filtering to avoid duplicate reprocessing
while still catching boundary updates.
- Improved Jira sync logging to include start/end window and overlap
configuration.
- Updated sync cursor tracking in `increase_docs` to keep
`poll_range_start` moving forward with max update time.
- Added KB ID safety check before updating existing document records in
`upload_document`.

Verification performed:
- Python syntax compile checks passed for modified files.
- Manual verification flow:
  1. Run full Jira sync.
  2. Edit an already-indexed Jira issue.
  3. Run next incremental sync.
  4. Confirm updated content is re-ingested into KB.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
NeedmeFordev
2026-03-18 09:31:05 -06:00
committed by GitHub
parent dee68c571b
commit c3f79dbcb0
5 changed files with 183 additions and 8 deletions

View File

@ -20,6 +20,7 @@ from common.data_source.config import (
INDEX_BATCH_SIZE,
JIRA_CONNECTOR_LABELS_TO_SKIP,
JIRA_CONNECTOR_MAX_TICKET_SIZE,
JIRA_SYNC_TIME_BUFFER_SECONDS,
JIRA_TIMEZONE_OFFSET,
ONE_HOUR,
DocumentSource,
@ -95,6 +96,7 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
scoped_token: bool = False,
attachment_size_limit: int | None = None,
timezone_offset: float | None = None,
time_buffer_seconds: int | None = JIRA_SYNC_TIME_BUFFER_SECONDS,
) -> None:
if not jira_base_url:
raise ConnectorValidationError("Jira base URL must be provided.")
@ -120,6 +122,16 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
self.timezone_offset = tz_offset_value
self.timezone = timezone(offset=timedelta(hours=tz_offset_value))
self._timezone_overridden = timezone_offset is not None
if time_buffer_seconds is None:
buffer_value = JIRA_SYNC_TIME_BUFFER_SECONDS
else:
try:
buffer_value = int(time_buffer_seconds)
except (TypeError, ValueError) as exc:
raise ConnectorValidationError(
f"Invalid time_buffer_seconds value ({time_buffer_seconds!r}); expected an integer."
) from exc
self.time_buffer_seconds = max(0, buffer_value)
# -------------------------------------------------------------------------
# Connector lifecycle helpers
@ -245,7 +257,16 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
while True:
attempt += 1
jql = self._build_jql(attempt_start, end)
logger.info(f"[Jira] Executing Jira JQL attempt {attempt} (buffered_retry={retried_with_buffer})[start and end parameters redacted]")
adjusted_start = self._adjust_start_for_query(attempt_start)
logger.info(
"[Jira] Executing Jira JQL attempt %s (buffered_retry=%s, start=%s, adjusted_start=%s, end=%s, overlap_buffer_s=%s)",
attempt,
retried_with_buffer,
attempt_start,
adjusted_start,
end,
self.time_buffer_seconds,
)
try:
return (yield from self._load_from_checkpoint_internal(jql, checkpoint, start_filter=start))
except Exception as exc:
@ -424,8 +445,9 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
labels = ", ".join(f'"{label}"' for label in self.labels_to_skip)
clauses.append(f"labels NOT IN ({labels})")
if start is not None:
clauses.append(f'updated >= "{self._format_jql_time(start)}"')
adjusted_start = self._adjust_start_for_query(start)
if adjusted_start is not None:
clauses.append(f'updated >= "{self._format_jql_time(adjusted_start)}"')
if end is not None:
clauses.append(f'updated <= "{self._format_jql_time(end)}"')
@ -437,6 +459,17 @@ class JiraConnector(CheckpointedConnectorWithPermSync, SlimConnectorWithPermSync
jql = f"{jql} ORDER BY updated ASC"
return jql
def _adjust_start_for_query(self, start: SecondsSinceUnixEpoch | None) -> SecondsSinceUnixEpoch | None:
"""Apply a small overlap buffer to protect against minute-precision JQL boundaries."""
if start is None:
return None
start_value = float(start)
if start_value <= 0:
return start_value
if self.time_buffer_seconds <= 0:
return start_value
return max(0.0, start_value - float(self.time_buffer_seconds))
def _format_jql_time(self, timestamp: SecondsSinceUnixEpoch) -> str:
dt_utc = datetime.fromtimestamp(float(timestamp), tz=timezone.utc)
dt_local = dt_utc.astimezone(self.timezone)