Merge remote-tracking branch 'origin/main' into feat/model-plugins-implementing

This commit is contained in:
yyh
2026-03-11 11:57:11 +08:00
10 changed files with 198 additions and 274 deletions

View File

@ -157,6 +157,7 @@ class PluginInstallTaskPluginStatus(BaseModel):
message: str = Field(description="The message of the install task.")
icon: str = Field(description="The icon of the plugin.")
labels: I18nObject = Field(description="The labels of the plugin.")
source: str | None = Field(default=None, description="The installation source of the plugin")
class PluginInstallTask(BasePluginEntity):

View File

@ -204,26 +204,61 @@ class WordExtractor(BaseExtractor):
return " ".join(unique_content)
def _parse_cell_paragraph(self, paragraph, image_map):
paragraph_content = []
for run in paragraph.runs:
if run.element.xpath(".//a:blip"):
for blip in run.element.xpath(".//a:blip"):
image_id = blip.get("{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed")
if not image_id:
continue
rel = paragraph.part.rels.get(image_id)
if rel is None:
continue
# For external images, use image_id as key; for internal, use target_part
if rel.is_external:
if image_id in image_map:
paragraph_content.append(image_map[image_id])
else:
image_part = rel.target_part
if image_part in image_map:
paragraph_content.append(image_map[image_part])
else:
paragraph_content.append(run.text)
paragraph_content: list[str] = []
for child in paragraph._element:
tag = child.tag
if tag == qn("w:hyperlink"):
# Note: w:hyperlink elements may also use w:anchor for internal bookmarks.
# This extractor intentionally only converts external links (HTTP/mailto, etc.)
# that are backed by a relationship id (r:id) with rel.is_external == True.
# Hyperlinks without such an external rel (including anchor-only bookmarks)
# are left as plain text link_text.
r_id = child.get(qn("r:id"))
link_text_parts: list[str] = []
for run_elem in child.findall(qn("w:r")):
run = Run(run_elem, paragraph)
if run.text:
link_text_parts.append(run.text)
link_text = "".join(link_text_parts).strip()
if r_id:
try:
rel = paragraph.part.rels.get(r_id)
if rel:
target_ref = getattr(rel, "target_ref", None)
if target_ref:
parsed_target = urlparse(str(target_ref))
if rel.is_external or parsed_target.scheme in ("http", "https", "mailto"):
display_text = link_text or str(target_ref)
link_text = f"[{display_text}]({target_ref})"
except Exception:
logger.exception("Failed to resolve URL for hyperlink with r:id: %s", r_id)
if link_text:
paragraph_content.append(link_text)
elif tag == qn("w:r"):
run = Run(child, paragraph)
if run.element.xpath(".//a:blip"):
for blip in run.element.xpath(".//a:blip"):
image_id = blip.get(
"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
)
if not image_id:
continue
rel = paragraph.part.rels.get(image_id)
if rel is None:
continue
if rel.is_external:
if image_id in image_map:
paragraph_content.append(image_map[image_id])
else:
image_part = rel.target_part
if image_part in image_map:
paragraph_content.append(image_map[image_part])
else:
if run.text:
paragraph_content.append(run.text)
return "".join(paragraph_content).strip()
def parse_docx(self, docx_path):

View File

@ -423,15 +423,6 @@ def test_table_to_markdown_and_parse_helpers(monkeypatch):
markdown = extractor._table_to_markdown(table, {})
assert markdown == "| H1 | H2 |\n| --- | --- |\n| A | B |"
class FakeRunElement:
def __init__(self, blips):
self._blips = blips
def xpath(self, pattern):
if pattern == ".//a:blip":
return self._blips
return []
class FakeBlip:
def __init__(self, image_id):
self.image_id = image_id
@ -439,11 +430,31 @@ def test_table_to_markdown_and_parse_helpers(monkeypatch):
def get(self, key):
return self.image_id
class FakeRunChild:
def __init__(self, blips, text=""):
self._blips = blips
self.text = text
self.tag = qn("w:r")
def xpath(self, pattern):
if pattern == ".//a:blip":
return self._blips
return []
class FakeRun:
def __init__(self, element, paragraph):
# Mirror the subset used by _parse_cell_paragraph
self.element = element
self.text = getattr(element, "text", "")
# Patch we.Run so our lightweight child objects work with the extractor
monkeypatch.setattr(we, "Run", FakeRun)
image_part = object()
paragraph = SimpleNamespace(
runs=[
SimpleNamespace(element=FakeRunElement([FakeBlip(None), FakeBlip("ext"), FakeBlip("int")]), text=""),
SimpleNamespace(element=FakeRunElement([]), text="plain"),
_element=[
FakeRunChild([FakeBlip(None), FakeBlip("ext"), FakeBlip("int")], text=""),
FakeRunChild([], text="plain"),
],
part=SimpleNamespace(
rels={
@ -452,6 +463,7 @@ def test_table_to_markdown_and_parse_helpers(monkeypatch):
}
),
)
image_map = {"ext": "EXT-IMG", image_part: "INT-IMG"}
assert extractor._parse_cell_paragraph(paragraph, image_map) == "EXT-IMGINT-IMGplain"
@ -625,3 +637,83 @@ def test_parse_docx_covers_drawing_shapes_hyperlink_error_and_table_branch(monke
assert "BrokenLink" in content
assert "TABLE-MARKDOWN" in content
logger_exception.assert_called_once()
def test_parse_cell_paragraph_hyperlink_in_table_cell_http():
doc = Document()
table = doc.add_table(rows=1, cols=1)
cell = table.cell(0, 0)
p = cell.paragraphs[0]
# Build modern hyperlink inside table cell
r_id = "rIdHttp1"
hyperlink = OxmlElement("w:hyperlink")
hyperlink.set(qn("r:id"), r_id)
run_elem = OxmlElement("w:r")
t = OxmlElement("w:t")
t.text = "Dify"
run_elem.append(t)
hyperlink.append(run_elem)
p._p.append(hyperlink)
# Relationship for external http link
doc.part.rels.add_relationship(
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink",
"https://dify.ai",
r_id,
is_external=True,
)
with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp:
doc.save(tmp.name)
tmp_path = tmp.name
try:
reopened = Document(tmp_path)
para = reopened.tables[0].cell(0, 0).paragraphs[0]
extractor = object.__new__(WordExtractor)
out = extractor._parse_cell_paragraph(para, {})
assert out == "[Dify](https://dify.ai)"
finally:
if os.path.exists(tmp_path):
os.remove(tmp_path)
def test_parse_cell_paragraph_hyperlink_in_table_cell_mailto():
doc = Document()
table = doc.add_table(rows=1, cols=1)
cell = table.cell(0, 0)
p = cell.paragraphs[0]
r_id = "rIdMail1"
hyperlink = OxmlElement("w:hyperlink")
hyperlink.set(qn("r:id"), r_id)
run_elem = OxmlElement("w:r")
t = OxmlElement("w:t")
t.text = "john@test.com"
run_elem.append(t)
hyperlink.append(run_elem)
p._p.append(hyperlink)
doc.part.rels.add_relationship(
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink",
"mailto:john@test.com",
r_id,
is_external=True,
)
with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp:
doc.save(tmp.name)
tmp_path = tmp.name
try:
reopened = Document(tmp_path)
para = reopened.tables[0].cell(0, 0).paragraphs[0]
extractor = object.__new__(WordExtractor)
out = extractor._parse_cell_paragraph(para, {})
assert out == "[john@test.com](mailto:john@test.com)"
finally:
if os.path.exists(tmp_path):
os.remove(tmp_path)

View File

@ -35,7 +35,7 @@ COPY --from=packages /app/web/ .
COPY . .
ENV NODE_OPTIONS="--max-old-space-size=4096"
RUN pnpm build:docker
RUN pnpm build
# production stage

View File

@ -20,17 +20,21 @@ const OnBlurBlock: FC<OnBlurBlockProps> = ({
}) => {
const [editor] = useLexicalComposerContext()
const ref = useRef<any>(null)
const ref = useRef<ReturnType<typeof setTimeout> | null>(null)
useEffect(() => {
return mergeRegister(
const clearHideMenuTimeout = () => {
if (ref.current) {
clearTimeout(ref.current)
ref.current = null
}
}
const unregister = mergeRegister(
editor.registerCommand(
CLEAR_HIDE_MENU_TIMEOUT,
() => {
if (ref.current) {
clearTimeout(ref.current)
ref.current = null
}
clearHideMenuTimeout()
return true
},
COMMAND_PRIORITY_EDITOR,
@ -41,6 +45,7 @@ const OnBlurBlock: FC<OnBlurBlockProps> = ({
// Check if the clicked target element is var-search-input
const target = event?.relatedTarget as HTMLElement
if (!target?.classList?.contains('var-search-input')) {
clearHideMenuTimeout()
ref.current = setTimeout(() => {
editor.dispatchCommand(KEY_ESCAPE_COMMAND, new KeyboardEvent('keydown', { key: 'Escape' }))
}, 200)
@ -61,6 +66,11 @@ const OnBlurBlock: FC<OnBlurBlockProps> = ({
COMMAND_PRIORITY_EDITOR,
),
)
return () => {
clearHideMenuTimeout()
unregister()
}
}, [editor, onBlur, onFocus])
return null

View File

@ -3565,11 +3565,6 @@
"count": 2
}
},
"app/components/base/prompt-editor/plugins/on-blur-or-focus-block.tsx": {
"ts/no-explicit-any": {
"count": 1
}
},
"app/components/base/prompt-editor/plugins/query-block/index.tsx": {
"react-refresh/only-export-components": {
"count": 2
@ -12614,14 +12609,6 @@
"count": 2
}
},
"scripts/optimize-standalone.js": {
"e18e/prefer-static-regex": {
"count": 1
},
"unused-imports/no-unused-vars": {
"count": 2
}
},
"scripts/refactor-component.js": {
"e18e/prefer-static-regex": {
"count": 14

View File

@ -218,3 +218,6 @@ export default antfu(
},
},
)
.disableRulesFix([
'e18e/prefer-array-at',
])

View File

@ -26,39 +26,36 @@
"node": "^22.22.1"
},
"scripts": {
"analyze": "next experimental-analyze",
"analyze-component": "node ./scripts/analyze-component.js",
"build": "next build",
"build:vinext": "vinext build",
"dev": "next dev",
"dev:inspect": "next dev --inspect",
"dev:vinext": "vinext dev",
"build": "next build",
"build:docker": "next build && node scripts/optimize-standalone.js",
"build:vinext": "vinext build",
"start": "node ./scripts/copy-and-start.mjs",
"start:vinext": "vinext start",
"gen-doc-paths": "tsx ./scripts/gen-doc-paths.ts",
"gen-icons": "node ./scripts/gen-icons.mjs && eslint --fix app/components/base/icons/src/",
"i18n:check": "tsx ./scripts/check-i18n.js",
"knip": "knip",
"lint": "eslint --cache --concurrency=auto",
"lint:ci": "eslint --cache --concurrency 2",
"lint:fix": "pnpm lint --fix",
"lint:quiet": "pnpm lint --quiet",
"lint:complexity": "pnpm lint --rule 'complexity: [error, {max: 15}]' --quiet",
"lint:report": "pnpm lint --output-file eslint_report.json --format json",
"lint:tss": "tsslint --project tsconfig.json",
"type-check": "tsc --noEmit",
"type-check:tsgo": "tsgo --noEmit",
"preinstall": "npx only-allow pnpm",
"prepare": "cd ../ && node -e \"if (process.env.NODE_ENV !== 'production'){process.exit(1)} \" || husky ./web/.husky",
"gen-icons": "node ./scripts/gen-icons.mjs && eslint --fix app/components/base/icons/src/",
"gen-doc-paths": "tsx ./scripts/gen-doc-paths.ts",
"uglify-embed": "node ./bin/uglify-embed",
"i18n:check": "tsx ./scripts/check-i18n.js",
"test": "vitest run",
"test:coverage": "vitest run --coverage",
"test:ci": "vitest run --coverage --silent=passed-only",
"test:watch": "vitest --watch",
"analyze-component": "node ./scripts/analyze-component.js",
"refactor-component": "node ./scripts/refactor-component.js",
"start": "node ./scripts/copy-and-start.mjs",
"start:vinext": "vinext start",
"storybook": "storybook dev -p 6006",
"storybook:build": "storybook build",
"preinstall": "npx only-allow pnpm",
"analyze": "next experimental-analyze",
"knip": "knip"
"test": "vitest run",
"test:ci": "vitest run --coverage --silent=passed-only",
"test:coverage": "vitest run --coverage",
"test:watch": "vitest --watch",
"type-check": "tsc --noEmit",
"type-check:tsgo": "tsgo --noEmit",
"uglify-embed": "node ./bin/uglify-embed"
},
"dependencies": {
"@amplitude/analytics-browser": "2.36.3",

View File

@ -1,38 +0,0 @@
# Production Build Optimization Scripts
## optimize-standalone.js
This script removes unnecessary development dependencies from the Next.js standalone build output to reduce the production Docker image size.
### What it does
The script specifically targets and removes `jest-worker` packages that are bundled with Next.js but not needed in production. These packages are included because:
1. Next.js includes jest-worker in its compiled dependencies
1. terser-webpack-plugin (used by Next.js for minification) depends on jest-worker
1. pnpm's dependency resolution creates symlinks to jest-worker in various locations
### Usage
The script is automatically run during Docker builds via the `build:docker` npm script:
```bash
# Docker build (removes jest-worker after build)
pnpm build:docker
```
To run the optimization manually:
```bash
node scripts/optimize-standalone.js
```
### What gets removed
- `node_modules/.pnpm/next@*/node_modules/next/dist/compiled/jest-worker`
- `node_modules/.pnpm/terser-webpack-plugin@*/node_modules/jest-worker` (symlinks)
- `node_modules/.pnpm/jest-worker@*` (actual packages)
### Impact
Removing jest-worker saves approximately 36KB per instance from the production image. While this may seem small, it helps ensure production images only contain necessary runtime dependencies.

View File

@ -1,163 +0,0 @@
/**
* Script to optimize Next.js standalone output for production
* Removes unnecessary files like jest-worker that are bundled with Next.js
*/
import fs from 'node:fs'
import path from 'node:path'
import { fileURLToPath } from 'node:url'
const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
console.log('🔧 Optimizing standalone output...')
const standaloneDir = path.join(__dirname, '..', '.next', 'standalone')
// Check if standalone directory exists
if (!fs.existsSync(standaloneDir)) {
console.error('❌ Standalone directory not found. Please run "next build" first.')
process.exit(1)
}
// List of paths to remove (relative to standalone directory)
const pathsToRemove = [
// Remove jest-worker from Next.js compiled dependencies
'node_modules/.pnpm/next@*/node_modules/next/dist/compiled/jest-worker',
// Remove jest-worker symlinks from terser-webpack-plugin
'node_modules/.pnpm/terser-webpack-plugin@*/node_modules/jest-worker',
// Remove actual jest-worker packages (directories only, not symlinks)
'node_modules/.pnpm/jest-worker@*',
]
// Function to safely remove a path
function removePath(basePath, relativePath) {
const fullPath = path.join(basePath, relativePath)
// Handle wildcard patterns
if (relativePath.includes('*')) {
const parts = relativePath.split('/')
let currentPath = basePath
for (let i = 0; i < parts.length; i++) {
const part = parts[i]
if (part.includes('*')) {
// Find matching directories
if (fs.existsSync(currentPath)) {
const entries = fs.readdirSync(currentPath)
// replace '*' with '.*'
const regexPattern = part.replace(/\*/g, '.*')
const regex = new RegExp(`^${regexPattern}$`)
for (const entry of entries) {
if (regex.test(entry)) {
const remainingPath = parts.slice(i + 1).join('/')
const matchedPath = path.join(currentPath, entry, remainingPath)
try {
// Use lstatSync to check if path exists (works for both files and symlinks)
const stats = fs.lstatSync(matchedPath)
if (stats.isSymbolicLink()) {
// Remove symlink
fs.unlinkSync(matchedPath)
console.log(`✅ Removed symlink: ${path.relative(basePath, matchedPath)}`)
}
else {
// Remove directory/file
fs.rmSync(matchedPath, { recursive: true, force: true })
console.log(`✅ Removed: ${path.relative(basePath, matchedPath)}`)
}
}
catch (error) {
// Silently ignore ENOENT (path not found) errors
if (error.code !== 'ENOENT') {
console.error(`❌ Failed to remove ${matchedPath}: ${error.message}`)
}
}
}
}
}
return
}
else {
currentPath = path.join(currentPath, part)
}
}
}
else {
// Direct path removal
if (fs.existsSync(fullPath)) {
try {
fs.rmSync(fullPath, { recursive: true, force: true })
console.log(`✅ Removed: ${relativePath}`)
}
catch (error) {
console.error(`❌ Failed to remove ${fullPath}: ${error.message}`)
}
}
}
}
// Remove unnecessary paths
console.log('🗑️ Removing unnecessary files...')
for (const pathToRemove of pathsToRemove) {
removePath(standaloneDir, pathToRemove)
}
// Calculate size reduction
console.log('\n📊 Optimization complete!')
// Optional: Display the size of remaining jest-related files (if any)
const checkForJest = (dir) => {
const jestFiles = []
function walk(currentPath) {
if (!fs.existsSync(currentPath))
return
try {
const entries = fs.readdirSync(currentPath)
for (const entry of entries) {
const fullPath = path.join(currentPath, entry)
try {
const stat = fs.lstatSync(fullPath) // Use lstatSync to handle symlinks
if (stat.isDirectory() && !stat.isSymbolicLink()) {
// Skip node_modules subdirectories to avoid deep traversal
if (entry === 'node_modules' && currentPath !== standaloneDir) {
continue
}
walk(fullPath)
}
else if (stat.isFile() && entry.includes('jest')) {
jestFiles.push(path.relative(standaloneDir, fullPath))
}
}
catch (err) {
// Skip files that can't be accessed
continue
}
}
}
catch (err) {
// Skip directories that can't be read
}
}
walk(dir)
return jestFiles
}
const remainingJestFiles = checkForJest(standaloneDir)
if (remainingJestFiles.length > 0) {
console.log('\n⚠ Warning: Some jest-related files still remain:')
remainingJestFiles.forEach(file => console.log(` - ${file}`))
}
else {
console.log('\n✨ No jest-related files found in standalone output!')
}