feat(web): metric selector

This commit is contained in:
JzoNg
2026-03-30 15:39:52 +08:00
parent 23d0d6a65d
commit b92b8becd1
15 changed files with 786 additions and 236 deletions

View File

@ -3,6 +3,9 @@ import Evaluation from '..'
import { getEvaluationMockConfig } from '../mock'
import { useEvaluationStore } from '../store'
const mockUseAvailableEvaluationMetrics = vi.hoisted(() => vi.fn())
const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())
vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
useModelList: () => ({
data: [{
@ -20,12 +23,39 @@ vi.mock('@/app/components/header/account-setting/model-provider-page/model-selec
),
}))
vi.mock('@/service/use-evaluation', () => ({
useAvailableEvaluationMetrics: (...args: unknown[]) => mockUseAvailableEvaluationMetrics(...args),
useEvaluationNodeInfoMutation: (...args: unknown[]) => mockUseEvaluationNodeInfoMutation(...args),
}))
describe('Evaluation', () => {
beforeEach(() => {
useEvaluationStore.setState({ resources: {} })
vi.clearAllMocks()
mockUseAvailableEvaluationMetrics.mockReturnValue({
data: {
metrics: ['answer-correctness', 'faithfulness'],
},
isLoading: false,
})
mockUseEvaluationNodeInfoMutation.mockReturnValue({
isPending: false,
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
options?.onSuccess?.({
'answer-correctness': [
{ node_id: 'node-answer', title: 'Answer Node', type: 'llm' },
],
'faithfulness': [
{ node_id: 'node-faithfulness', title: 'Retriever Node', type: 'retriever' },
],
})
},
})
})
it('should search, add metrics, and create a batch history record', async () => {
it('should search, select metric nodes, and create a batch history record', async () => {
vi.useFakeTimers()
render(<Evaluation resourceType="workflow" resourceId="app-1" />)
@ -33,32 +63,27 @@ describe('Evaluation', () => {
expect(screen.getByTestId('evaluation-model-selector')).toHaveTextContent('openai:gpt-4o-mini')
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
expect(screen.getByTestId('evaluation-metric-loading')).toBeInTheDocument()
await act(async () => {
vi.advanceTimersByTime(200)
})
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchPlaceholder'), {
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchNodeOrMetrics'), {
target: { value: 'does-not-exist' },
})
await act(async () => {
vi.advanceTimersByTime(200)
})
expect(screen.getByText('evaluation.metrics.noResults')).toBeInTheDocument()
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchPlaceholder'), {
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchNodeOrMetrics'), {
target: { value: 'faith' },
})
await act(async () => {
vi.advanceTimersByTime(200)
fireEvent.click(screen.getByTestId('evaluation-metric-node-faithfulness-node-faithfulness'))
expect(screen.getAllByText('Faithfulness').length).toBeGreaterThan(0)
expect(screen.getAllByText('Retriever Node').length).toBeGreaterThan(0)
fireEvent.change(screen.getByPlaceholderText('evaluation.metrics.searchNodeOrMetrics'), {
target: { value: '' },
})
fireEvent.click(screen.getByRole('button', { name: /Faithfulness/i }))
expect(screen.getAllByText('Faithfulness').length).toBeGreaterThan(0)
fireEvent.click(screen.getByTestId('evaluation-metric-node-answer-correctness-node-answer'))
expect(screen.getAllByText('Answer Correctness').length).toBeGreaterThan(0)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.batch.run' }))
expect(screen.getByText('evaluation.batch.status.running')).toBeInTheDocument()
@ -109,4 +134,78 @@ describe('Evaluation', () => {
expect(screen.queryByText('evaluation.conditions.selectTime')).not.toBeInTheDocument()
})
it('should render the metric no-node empty state', () => {
mockUseAvailableEvaluationMetrics.mockReturnValue({
data: {
metrics: ['context-precision'],
},
isLoading: false,
})
mockUseEvaluationNodeInfoMutation.mockReturnValue({
isPending: false,
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
options?.onSuccess?.({
'context-precision': [],
})
},
})
render(<Evaluation resourceType="workflow" resourceId="app-3" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
expect(screen.getByText('evaluation.metrics.noNodesInWorkflow')).toBeInTheDocument()
})
it('should render the global empty state when no metrics are available', () => {
mockUseAvailableEvaluationMetrics.mockReturnValue({
data: {
metrics: [],
},
isLoading: false,
})
render(<Evaluation resourceType="workflow" resourceId="app-4" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
expect(screen.getByText('evaluation.metrics.noResults')).toBeInTheDocument()
})
it('should show more nodes when a metric has more than three nodes', () => {
mockUseAvailableEvaluationMetrics.mockReturnValue({
data: {
metrics: ['answer-correctness'],
},
isLoading: false,
})
mockUseEvaluationNodeInfoMutation.mockReturnValue({
isPending: false,
mutate: (_input: unknown, options?: { onSuccess?: (data: Record<string, Array<{ node_id: string, title: string, type: string }>>) => void }) => {
options?.onSuccess?.({
'answer-correctness': [
{ node_id: 'node-1', title: 'LLM 1', type: 'llm' },
{ node_id: 'node-2', title: 'LLM 2', type: 'llm' },
{ node_id: 'node-3', title: 'LLM 3', type: 'llm' },
{ node_id: 'node-4', title: 'LLM 4', type: 'llm' },
],
})
},
})
render(<Evaluation resourceType="workflow" resourceId="app-5" />)
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.add' }))
expect(screen.getByText('LLM 3')).toBeInTheDocument()
expect(screen.queryByText('LLM 4')).not.toBeInTheDocument()
fireEvent.click(screen.getByRole('button', { name: 'evaluation.metrics.showMore' }))
expect(screen.getByText('LLM 4')).toBeInTheDocument()
expect(screen.getByRole('button', { name: 'evaluation.metrics.showLess' })).toBeInTheDocument()
})
})

View File

@ -51,6 +51,30 @@ describe('evaluation store', () => {
expect(useEvaluationStore.getState().resources['workflow:app-2'].metrics.some(metric => metric.id === addedMetric!.id)).toBe(false)
})
it('should upsert builtin metric node selections', () => {
const resourceType = 'workflow'
const resourceId = 'app-4'
const store = useEvaluationStore.getState()
const config = getEvaluationMockConfig(resourceType)
const metricId = config.builtinMetrics[0].id
store.ensureResource(resourceType, resourceId)
store.addBuiltinMetric(resourceType, resourceId, metricId, [
{ node_id: 'node-1', title: 'Answer Node', type: 'answer' },
])
store.addBuiltinMetric(resourceType, resourceId, metricId, [
{ node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
])
const metric = useEvaluationStore.getState().resources['workflow:app-4'].metrics.find(item => item.optionId === metricId)
expect(metric?.nodeInfoList).toEqual([
{ node_id: 'node-2', title: 'Retriever Node', type: 'retriever' },
])
expect(useEvaluationStore.getState().resources['workflow:app-4'].metrics.filter(item => item.optionId === metricId)).toHaveLength(1)
})
it('should update condition groups and adapt operators to field types', () => {
const resourceType = 'pipeline'
const resourceId = 'dataset-1'

View File

@ -36,7 +36,7 @@ const MetricSection = ({
</div>
)}
{resource.metrics.map(metric => (
<div key={metric.id} className="rounded-xl border border-divider-subtle bg-components-card-bg p-4">
<div key={metric.id} className="rounded-2xl border border-divider-subtle bg-components-card-bg p-4">
<div className="flex items-start justify-between gap-3">
<div>
<div className="text-text-primary system-sm-semibold">{metric.label}</div>
@ -46,6 +46,22 @@ const MetricSection = ({
<Badge key={badge} className={badge === 'Workflow' ? 'badge-accent' : ''}>{badge}</Badge>
))}
</div>
{metric.kind === 'builtin' && (
<div className="mt-3 rounded-xl bg-background-default-subtle px-3 py-2">
<div className="text-text-secondary system-2xs-medium-uppercase">{t('metrics.nodesLabel')}</div>
<div className="mt-2 flex flex-wrap gap-2">
{metric.nodeInfoList?.length
? metric.nodeInfoList.map(nodeInfo => (
<Badge key={nodeInfo.node_id} className="badge-accent">
{nodeInfo.title}
</Badge>
))
: (
<span className="text-text-tertiary system-xs-regular">{t('metrics.nodesAll')}</span>
)}
</div>
</div>
)}
</div>
<Button
size="small"
@ -68,7 +84,6 @@ const MetricSection = ({
<MetricSelector
resourceType={resourceType}
resourceId={resourceId}
triggerStyle="text"
/>
</div>
</section>

View File

@ -1,203 +0,0 @@
'use client'
import type { ChangeEvent } from 'react'
import type { EvaluationResourceProps } from '../types'
import { useEffect, useMemo, useRef, useState } from 'react'
import { useTranslation } from 'react-i18next'
import Badge from '@/app/components/base/badge'
import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input'
import {
Popover,
PopoverContent,
PopoverTrigger,
} from '@/app/components/base/ui/popover'
import { cn } from '@/utils/classnames'
import { getEvaluationMockConfig } from '../mock'
import { useEvaluationResource, useEvaluationStore } from '../store'
type MetricSelectorProps = EvaluationResourceProps & {
triggerVariant?: 'primary' | 'warning' | 'secondary' | 'secondary-accent' | 'ghost' | 'ghost-accent' | 'tertiary'
triggerClassName?: string
triggerStyle?: 'button' | 'text'
}
const MetricSelector = ({
resourceType,
resourceId,
triggerVariant = 'secondary',
triggerClassName,
triggerStyle = 'button',
}: MetricSelectorProps) => {
const { t } = useTranslation('evaluation')
const config = getEvaluationMockConfig(resourceType)
const metricGroupLabels = {
quality: t('metrics.groups.quality'),
operations: t('metrics.groups.operations'),
}
const metrics = useEvaluationResource(resourceType, resourceId).metrics
const addBuiltinMetric = useEvaluationStore(state => state.addBuiltinMetric)
const addCustomMetric = useEvaluationStore(state => state.addCustomMetric)
const [open, setOpen] = useState(false)
const [query, setQuery] = useState('')
const [showAll, setShowAll] = useState(false)
const [isLoading, setIsLoading] = useState(false)
const loadingTimerRef = useRef<number | null>(null)
const triggerLoading = () => {
if (loadingTimerRef.current)
window.clearTimeout(loadingTimerRef.current)
setIsLoading(true)
loadingTimerRef.current = window.setTimeout(() => {
setIsLoading(false)
}, 180)
}
const handleOpenChange = (nextOpen: boolean) => {
setOpen(nextOpen)
if (nextOpen) {
triggerLoading()
return
}
if (loadingTimerRef.current)
window.clearTimeout(loadingTimerRef.current)
setIsLoading(false)
}
const handleQueryChange = (event: ChangeEvent<HTMLInputElement>) => {
setQuery(event.target.value)
if (open)
triggerLoading()
}
useEffect(() => {
return () => {
if (loadingTimerRef.current)
window.clearTimeout(loadingTimerRef.current)
}
}, [])
const filteredGroups = useMemo(() => {
const filteredMetrics = config.builtinMetrics.filter((metric) => {
const keyword = query.trim().toLowerCase()
if (!keyword)
return true
return metric.label.toLowerCase().includes(keyword) || metric.description.toLowerCase().includes(keyword)
})
const grouped = filteredMetrics.reduce<Record<string, typeof filteredMetrics>>((acc, metric) => {
acc[metric.group] = [...(acc[metric.group] ?? []), metric]
return acc
}, {})
return Object.entries(grouped)
}, [config.builtinMetrics, query])
return (
<Popover open={open} onOpenChange={handleOpenChange}>
<PopoverTrigger
render={(
triggerStyle === 'text'
? (
<button type="button" className={cn('inline-flex items-center text-text-accent system-sm-medium', triggerClassName)}>
<span aria-hidden="true" className="i-ri-add-line mr-1 h-4 w-4" />
{t('metrics.add')}
</button>
)
: (
<Button variant={triggerVariant} className={triggerClassName}>
<span aria-hidden="true" className="i-ri-add-line mr-1 h-4 w-4" />
{t('metrics.add')}
</Button>
)
)}
/>
<PopoverContent popupClassName="w-[360px] p-3">
<div className="space-y-3">
<Input
value={query}
showLeftIcon
placeholder={t('metrics.searchPlaceholder')}
onChange={handleQueryChange}
/>
<div className="max-h-[320px] space-y-3 overflow-y-auto pr-1">
{isLoading && (
<div className="space-y-2" data-testid="evaluation-metric-loading">
{['metric-skeleton-1', 'metric-skeleton-2', 'metric-skeleton-3'].map(key => (
<div key={key} className="h-14 animate-pulse rounded-xl bg-background-default-subtle" />
))}
</div>
)}
{!isLoading && filteredGroups.length === 0 && (
<div className="rounded-xl border border-dashed border-divider-subtle px-4 py-8 text-center text-text-tertiary system-sm-regular">
{t('metrics.noResults')}
</div>
)}
{!isLoading && filteredGroups.map(([groupName, options]) => {
const shownOptions = showAll ? options : options.slice(0, 2)
return (
<div key={groupName}>
<div className="mb-2 text-text-tertiary system-xs-medium-uppercase">{metricGroupLabels[groupName as keyof typeof metricGroupLabels] ?? groupName}</div>
<div className="space-y-2">
{shownOptions.map(option => (
<button
key={option.id}
type="button"
className="w-full rounded-xl border border-divider-subtle px-3 py-3 text-left hover:border-components-button-secondary-border hover:bg-state-base-hover-alt"
onClick={() => {
addBuiltinMetric(resourceType, resourceId, option.id)
setOpen(false)
}}
>
<div className="flex items-start justify-between gap-3">
<div>
<div className="text-text-primary system-sm-semibold">{option.label}</div>
<div className="mt-1 text-text-tertiary system-xs-regular">{option.description}</div>
</div>
{metrics.some(metric => metric.optionId === option.id && metric.kind === 'builtin') && (
<Badge className="badge-accent">{t('metrics.added')}</Badge>
)}
</div>
</button>
))}
</div>
</div>
)
})}
</div>
{filteredGroups.some(([, options]) => options.length > 2) && (
<button
type="button"
className="flex items-center text-text-accent system-sm-medium"
onClick={() => setShowAll(value => !value)}
>
{showAll ? t('metrics.showLess') : t('metrics.showMore')}
<span
aria-hidden="true"
className={cn('i-ri-arrow-down-s-line ml-1 h-4 w-4 transition-transform', showAll && 'rotate-180')}
/>
</button>
)}
<div className="border-t border-divider-subtle pt-3">
<Button
className="w-full justify-center"
variant="ghost-accent"
onClick={() => {
addCustomMetric(resourceType, resourceId)
setOpen(false)
}}
>
{t('metrics.addCustom')}
</Button>
</div>
</div>
</PopoverContent>
</Popover>
)
}
export default MetricSelector

View File

@ -0,0 +1,149 @@
'use client'
import type { ChangeEvent } from 'react'
import type { MetricSelectorProps } from './types'
import { useState } from 'react'
import { useTranslation } from 'react-i18next'
import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input'
import {
Popover,
PopoverContent,
PopoverTrigger,
} from '@/app/components/base/ui/popover'
import { cn } from '@/utils/classnames'
import { useEvaluationStore } from '../../store'
import SelectorEmptyState from './selector-empty-state'
import SelectorFooter from './selector-footer'
import SelectorMetricSection from './selector-metric-section'
import { useMetricSelectorData } from './use-metric-selector-data'
const MetricSelector = ({
resourceType,
resourceId,
triggerVariant = 'ghost-accent',
triggerClassName,
triggerStyle = 'button',
}: MetricSelectorProps) => {
const { t } = useTranslation('evaluation')
const addCustomMetric = useEvaluationStore(state => state.addCustomMetric)
const [open, setOpen] = useState(false)
const [query, setQuery] = useState('')
const [nodeInfoMap, setNodeInfoMap] = useState<Record<string, Array<{ node_id: string, title: string, type: string }>>>({})
const [collapsedMetricMap, setCollapsedMetricMap] = useState<Record<string, boolean>>({})
const [expandedMetricNodesMap, setExpandedMetricNodesMap] = useState<Record<string, boolean>>({})
const {
builtinMetricMap,
filteredSections,
isRemoteLoading,
toggleNodeSelection,
} = useMetricSelectorData({
open,
query,
resourceType,
resourceId,
nodeInfoMap,
setNodeInfoMap,
})
const handleOpenChange = (nextOpen: boolean) => {
setOpen(nextOpen)
if (nextOpen) {
setQuery('')
setCollapsedMetricMap({})
setExpandedMetricNodesMap({})
}
}
const handleQueryChange = (event: ChangeEvent<HTMLInputElement>) => {
setQuery(event.target.value)
}
return (
<Popover open={open} onOpenChange={handleOpenChange}>
<PopoverTrigger
render={(
triggerStyle === 'text'
? (
<button type="button" className={cn('inline-flex items-center text-text-accent system-sm-medium', triggerClassName)}>
<span aria-hidden="true" className="i-ri-add-line mr-1 h-4 w-4" />
{t('metrics.add')}
</button>
)
: (
<Button variant={triggerVariant} className={triggerClassName}>
<span aria-hidden="true" className="i-ri-add-line mr-1 h-4 w-4" />
{t('metrics.add')}
</Button>
)
)}
/>
<PopoverContent popupClassName="w-[360px] overflow-hidden rounded-xl border-[0.5px] border-components-panel-border p-0 shadow-[0px_12px_16px_-4px_rgba(9,9,11,0.08),0px_4px_6px_-2px_rgba(9,9,11,0.03)]">
<div className="flex min-h-[560px] flex-col bg-components-panel-bg">
<div className="border-b border-divider-subtle bg-background-section-burn px-2 py-2">
<Input
value={query}
showLeftIcon
placeholder={t('metrics.searchNodeOrMetrics')}
onChange={handleQueryChange}
/>
</div>
<div className="min-h-0 flex-1 overflow-y-auto">
{isRemoteLoading && (
<div className="space-y-3 px-3 py-4" data-testid="evaluation-metric-loading">
{['metric-skeleton-1', 'metric-skeleton-2', 'metric-skeleton-3'].map(key => (
<div key={key} className="h-20 animate-pulse rounded-xl bg-background-default-subtle" />
))}
</div>
)}
{!isRemoteLoading && filteredSections.length === 0 && (
<SelectorEmptyState message={t('metrics.noResults')} />
)}
{!isRemoteLoading && filteredSections.map((section, index) => {
const { metric } = section
const isExpanded = collapsedMetricMap[metric.id] !== true
const isShowingAllNodes = expandedMetricNodesMap[metric.id] === true
return (
<SelectorMetricSection
key={metric.id}
section={section}
index={index}
addedMetric={builtinMetricMap.get(metric.id)}
isExpanded={isExpanded}
isShowingAllNodes={isShowingAllNodes}
onToggleExpanded={() => setCollapsedMetricMap(current => ({
...current,
[metric.id]: isExpanded,
}))}
onToggleNodeSelection={toggleNodeSelection}
onToggleShowAllNodes={() => setExpandedMetricNodesMap(current => ({
...current,
[metric.id]: !isShowingAllNodes,
}))}
t={t}
/>
)
})}
</div>
<SelectorFooter
title={t('metrics.custom.footerTitle')}
description={t('metrics.custom.footerDescription')}
onClick={() => {
addCustomMetric(resourceType, resourceId)
setOpen(false)
}}
/>
</div>
</PopoverContent>
</Popover>
)
}
export default MetricSelector

View File

@ -0,0 +1,26 @@
type SelectorEmptyStateProps = {
message: string
}
const EmptySearchStateIcon = () => {
return (
<div className="relative h-8 w-8 text-text-quaternary">
<span aria-hidden="true" className="i-ri-search-line absolute bottom-0 right-0 h-6 w-6" />
<span aria-hidden="true" className="absolute left-0 top-[9px] h-[2px] w-[7px] rounded-full bg-current opacity-80" />
<span aria-hidden="true" className="absolute left-0 top-[16px] h-[2px] w-[4px] rounded-full bg-current opacity-80" />
</div>
)
}
const SelectorEmptyState = ({
message,
}: SelectorEmptyStateProps) => {
return (
<div className="flex h-full min-h-[524px] flex-col items-center justify-center gap-2 px-4 pb-20 text-center">
<EmptySearchStateIcon />
<div className="text-text-secondary system-sm-regular">{message}</div>
</div>
)
}
export default SelectorEmptyState

View File

@ -0,0 +1,30 @@
type SelectorFooterProps = {
title: string
description: string
onClick: () => void
}
const SelectorFooter = ({
title,
description,
onClick,
}: SelectorFooterProps) => {
return (
<button
type="button"
className="relative flex items-center gap-3 overflow-hidden border-t border-divider-subtle bg-background-default-subtle px-4 py-5 text-left hover:bg-state-base-hover-alt"
onClick={onClick}
>
<div className="absolute -left-6 -top-6 h-28 w-28 rounded-full bg-util-colors-indigo-indigo-100 opacity-50 blur-2xl" />
<div className="relative flex h-8 w-8 shrink-0 items-center justify-center rounded-[10px] border-[0.5px] border-components-card-border bg-components-card-bg shadow-[0px_3px_10px_-2px_rgba(9,9,11,0.08),0px_2px_4px_-2px_rgba(9,9,11,0.06)]">
<span aria-hidden="true" className="i-ri-add-line h-[18px] w-[18px] text-text-tertiary" />
</div>
<div className="relative min-w-0">
<div className="text-text-secondary system-sm-semibold">{title}</div>
<div className="mt-0.5 text-text-tertiary system-xs-regular">{description}</div>
</div>
</button>
)
}
export default SelectorFooter

View File

@ -0,0 +1,135 @@
import type { TFunction } from 'i18next'
import type { EvaluationMetric } from '../../types'
import type { MetricSelectorSection } from './types'
import { cn } from '@/utils/classnames'
import { getMetricVisual, getNodeVisual, getToneClasses } from './utils'
type SelectorMetricSectionProps = {
section: MetricSelectorSection
index: number
addedMetric?: EvaluationMetric
isExpanded: boolean
isShowingAllNodes: boolean
onToggleExpanded: () => void
onToggleShowAllNodes: () => void
onToggleNodeSelection: (metricId: string, nodeInfo: MetricSelectorSection['visibleNodes'][number]) => void
t: TFunction<'evaluation'>
}
const SelectorMetricSection = ({
section,
index,
addedMetric,
isExpanded,
isShowingAllNodes,
onToggleExpanded,
onToggleShowAllNodes,
onToggleNodeSelection,
t,
}: SelectorMetricSectionProps) => {
const { metric, visibleNodes, hasNoNodeInfo } = section
const selectedNodeIds = new Set(
addedMetric?.nodeInfoList?.length
? addedMetric.nodeInfoList.map(nodeInfo => nodeInfo.node_id)
: [],
)
const metricVisual = getMetricVisual(metric.id)
const toneClasses = getToneClasses(metricVisual.tone)
const hasMoreNodes = visibleNodes.length > 3
const shownNodes = hasMoreNodes && !isShowingAllNodes ? visibleNodes.slice(0, 3) : visibleNodes
return (
<div data-testid={`evaluation-metric-option-${metric.id}`}>
{index > 0 && (
<div className="px-3 pt-1">
<div className="h-px w-full bg-divider-subtle" />
</div>
)}
<div className="flex items-center justify-between px-4 pb-1 pt-3">
<button
type="button"
className="flex min-w-0 items-center gap-2"
onClick={onToggleExpanded}
>
<div className={cn('flex h-[18px] w-[18px] items-center justify-center rounded-md', toneClasses.soft)}>
<span aria-hidden="true" className={cn(metricVisual.icon, 'h-3.5 w-3.5')} />
</div>
<div className="flex items-center gap-1">
<span className="truncate text-text-secondary system-xs-medium-uppercase">{metric.label}</span>
<span
aria-hidden="true"
className={cn('i-ri-arrow-down-s-line h-4 w-4 text-text-quaternary transition-transform', !isExpanded && '-rotate-90')}
/>
</div>
</button>
<button type="button" className="p-px text-text-quaternary">
<span aria-hidden="true" className="i-ri-question-line h-[14px] w-[14px]" />
</button>
</div>
{isExpanded && (
<div className="px-1 py-1">
{hasNoNodeInfo && (
<div className="px-3 pb-2 pt-0.5 text-text-tertiary system-sm-regular">
{t('metrics.noNodesInWorkflow')}
</div>
)}
{shownNodes.map((nodeInfo) => {
const nodeVisual = getNodeVisual(nodeInfo)
const nodeToneClasses = getToneClasses(nodeVisual.tone)
const isAdded = addedMetric
? addedMetric.nodeInfoList?.length
? selectedNodeIds.has(nodeInfo.node_id)
: true
: false
return (
<button
key={nodeInfo.node_id}
data-testid={`evaluation-metric-node-${metric.id}-${nodeInfo.node_id}`}
type="button"
className={cn(
'flex w-full items-center gap-1 rounded-md px-2 py-1.5 text-left transition-colors hover:bg-state-base-hover-alt',
isAdded && 'opacity-50',
)}
onClick={() => onToggleNodeSelection(metric.id, nodeInfo)}
>
<div className="flex min-w-0 flex-1 items-center gap-2.5 pr-1">
<div className={cn('flex h-[18px] w-[18px] shrink-0 items-center justify-center rounded-md border-[0.45px] border-divider-subtle shadow-xs shadow-shadow-shadow-3', nodeToneClasses.solid)}>
<span aria-hidden="true" className={cn(nodeVisual.icon, 'h-3.5 w-3.5')} />
</div>
<span className="truncate text-[13px] font-medium leading-4 text-text-secondary">
{nodeInfo.title}
</span>
</div>
{isAdded && (
<span className="shrink-0 px-1 text-text-quaternary system-xs-regular">{t('metrics.added')}</span>
)}
</button>
)
})}
{hasMoreNodes && (
<button
type="button"
className="flex w-full items-center gap-1 rounded-md px-2 py-1.5 text-left hover:bg-state-base-hover-alt"
onClick={onToggleShowAllNodes}
>
<div className="flex min-w-0 flex-1 items-center gap-1.5 pr-1">
<div className="flex items-center px-1 text-text-tertiary">
<span aria-hidden="true" className={cn(isShowingAllNodes ? 'i-ri-subtract-line' : 'i-ri-more-line', 'h-4 w-4')} />
</div>
<span className="truncate text-text-tertiary system-xs-regular">
{isShowingAllNodes ? t('metrics.showLess') : t('metrics.showMore')}
</span>
</div>
</button>
)}
</div>
)}
</div>
)
}
export default SelectorMetricSection

View File

@ -0,0 +1,18 @@
import type { EvaluationMetric, EvaluationResourceProps, MetricOption } from '../../types'
import type { NodeInfo } from '@/types/evaluation'
export type MetricSelectorProps = EvaluationResourceProps & {
triggerVariant?: 'primary' | 'warning' | 'secondary' | 'secondary-accent' | 'ghost' | 'ghost-accent' | 'tertiary'
triggerClassName?: string
triggerStyle?: 'button' | 'text'
}
export type MetricVisualTone = 'indigo' | 'green'
export type MetricSelectorSection = {
metric: MetricOption
hasNoNodeInfo: boolean
visibleNodes: NodeInfo[]
}
export type BuiltinMetricMap = Map<string, EvaluationMetric>

View File

@ -0,0 +1,167 @@
import type { BuiltinMetricMap, MetricSelectorSection } from './types'
import type { NodeInfo } from '@/types/evaluation'
import { useEffect, useMemo } from 'react'
import { useAvailableEvaluationMetrics, useEvaluationNodeInfoMutation } from '@/service/use-evaluation'
import { getEvaluationMockConfig } from '../../mock'
import { useEvaluationResource, useEvaluationStore } from '../../store'
import {
buildMetricOption,
dedupeNodeInfoList,
toEvaluationTargetType,
} from './utils'
type UseMetricSelectorDataOptions = {
open: boolean
query: string
resourceType: 'workflow' | 'pipeline' | 'snippet'
resourceId: string
nodeInfoMap: Record<string, NodeInfo[]>
setNodeInfoMap: (value: Record<string, NodeInfo[]>) => void
}
type UseMetricSelectorDataResult = {
builtinMetricMap: BuiltinMetricMap
filteredSections: MetricSelectorSection[]
isRemoteLoading: boolean
toggleNodeSelection: (metricId: string, nodeInfo: NodeInfo) => void
}
export const useMetricSelectorData = ({
open,
query,
resourceType,
resourceId,
nodeInfoMap,
setNodeInfoMap,
}: UseMetricSelectorDataOptions): UseMetricSelectorDataResult => {
const config = getEvaluationMockConfig(resourceType)
const metrics = useEvaluationResource(resourceType, resourceId).metrics
const addBuiltinMetric = useEvaluationStore(state => state.addBuiltinMetric)
const removeMetric = useEvaluationStore(state => state.removeMetric)
const { data: availableMetricsData, isLoading: isAvailableMetricsLoading } = useAvailableEvaluationMetrics(open)
const { mutate: loadNodeInfo, isPending: isNodeInfoLoading } = useEvaluationNodeInfoMutation()
const builtinMetrics = useMemo(() => {
return metrics.filter(metric => metric.kind === 'builtin')
}, [metrics])
const builtinMetricMap = useMemo(() => {
return new Map(builtinMetrics.map(metric => [metric.optionId, metric] as const))
}, [builtinMetrics])
const availableMetricIds = useMemo(() => availableMetricsData?.metrics ?? [], [availableMetricsData?.metrics])
const availableMetricIdsKey = availableMetricIds.join(',')
const resolvedMetrics = useMemo(() => {
const metricsMap = new Map(config.builtinMetrics.map(metric => [metric.id, metric] as const))
const defaultGroup = config.builtinMetrics[0]?.group ?? 'other'
return availableMetricIds.map(metricId => metricsMap.get(metricId) ?? buildMetricOption(metricId, defaultGroup))
}, [availableMetricIds, config.builtinMetrics])
useEffect(() => {
if (!open)
return
if (resourceType === 'pipeline' || !resourceId || availableMetricIds.length === 0)
return
let isActive = true
loadNodeInfo(
{
params: {
targetType: toEvaluationTargetType(resourceType),
targetId: resourceId,
},
body: {
metrics: availableMetricIds,
},
},
{
onSuccess: (data) => {
if (!isActive)
return
setNodeInfoMap(data)
},
onError: () => {
if (!isActive)
return
setNodeInfoMap({})
},
},
)
return () => {
isActive = false
}
}, [availableMetricIds, availableMetricIdsKey, loadNodeInfo, open, resourceId, resourceType, setNodeInfoMap])
const filteredSections = useMemo(() => {
const keyword = query.trim().toLowerCase()
return resolvedMetrics.map((metric) => {
const metricMatches = !keyword
|| metric.label.toLowerCase().includes(keyword)
|| metric.description.toLowerCase().includes(keyword)
const metricNodes = nodeInfoMap[metric.id] ?? []
const supportsNodeSelection = resourceType !== 'pipeline'
const hasNoNodeInfo = supportsNodeSelection && metricNodes.length === 0
if (hasNoNodeInfo) {
if (!metricMatches)
return null
return {
metric,
hasNoNodeInfo: true,
visibleNodes: [] as NodeInfo[],
}
}
const visibleNodes = metricMatches
? metricNodes
: metricNodes.filter((nodeInfo) => {
return nodeInfo.title.toLowerCase().includes(keyword)
|| nodeInfo.type.toLowerCase().includes(keyword)
|| nodeInfo.node_id.toLowerCase().includes(keyword)
})
if (!metricMatches && visibleNodes.length === 0)
return null
return {
metric,
hasNoNodeInfo: false,
visibleNodes,
}
}).filter(section => !!section)
}, [nodeInfoMap, query, resolvedMetrics, resourceType])
const toggleNodeSelection = (metricId: string, nodeInfo: NodeInfo) => {
const addedMetric = builtinMetricMap.get(metricId)
const currentSelectedNodes = addedMetric?.nodeInfoList ?? []
const nextSelectedNodes = addedMetric && currentSelectedNodes.length === 0
? [nodeInfo]
: currentSelectedNodes.some(item => item.node_id === nodeInfo.node_id)
? currentSelectedNodes.filter(item => item.node_id !== nodeInfo.node_id)
: dedupeNodeInfoList([...currentSelectedNodes, nodeInfo])
if (addedMetric && nextSelectedNodes.length === 0) {
removeMetric(resourceType, resourceId, addedMetric.id)
return
}
addBuiltinMetric(resourceType, resourceId, metricId, nextSelectedNodes)
}
return {
builtinMetricMap,
filteredSections,
isRemoteLoading: isAvailableMetricsLoading || isNodeInfoLoading,
toggleNodeSelection,
}
}

View File

@ -0,0 +1,77 @@
import type { MetricOption } from '../../types'
import type { MetricVisualTone } from './types'
import type { EvaluationTargetType, NodeInfo } from '@/types/evaluation'
export const toEvaluationTargetType = (resourceType: 'workflow' | 'snippet'): EvaluationTargetType => {
return resourceType === 'snippet' ? 'snippets' : 'app'
}
export const humanizeMetricId = (metricId: string) => {
return metricId
.split(/[-_]/g)
.filter(Boolean)
.map(part => part.charAt(0).toUpperCase() + part.slice(1))
.join(' ')
}
export const buildMetricOption = (metricId: string, fallbackGroup: string): MetricOption => ({
id: metricId,
label: humanizeMetricId(metricId),
description: '',
group: fallbackGroup,
badges: ['Built-in'],
})
export const getMetricVisual = (metricId: string): { icon: string, tone: MetricVisualTone } => {
if (['context-precision', 'context-recall'].includes(metricId)) {
return {
icon: metricId === 'context-recall' ? 'i-ri-arrow-go-back-line' : 'i-ri-focus-2-line',
tone: 'green',
}
}
if (metricId === 'faithfulness')
return { icon: 'i-ri-anchor-line', tone: 'indigo' }
if (metricId === 'tool-correctness')
return { icon: 'i-ri-tools-line', tone: 'indigo' }
if (metricId === 'task-completion')
return { icon: 'i-ri-task-line', tone: 'indigo' }
if (metricId === 'argument-correctness')
return { icon: 'i-ri-scales-3-line', tone: 'indigo' }
return { icon: 'i-ri-checkbox-circle-line', tone: 'indigo' }
}
export const getNodeVisual = (nodeInfo: NodeInfo): { icon: string, tone: MetricVisualTone } => {
const normalizedType = nodeInfo.type.toLowerCase()
const normalizedTitle = nodeInfo.title.toLowerCase()
if (normalizedType.includes('retriev') || normalizedTitle.includes('retriev') || normalizedTitle.includes('knowledge'))
return { icon: 'i-ri-book-open-line', tone: 'green' }
if (normalizedType.includes('agent') || normalizedTitle.includes('agent'))
return { icon: 'i-ri-user-star-line', tone: 'indigo' }
return { icon: 'i-ri-ai-generate-2', tone: 'indigo' }
}
export const getToneClasses = (tone: MetricVisualTone) => {
if (tone === 'green') {
return {
soft: 'bg-util-colors-green-green-50 text-util-colors-green-green-500',
solid: 'bg-util-colors-green-green-500 text-white',
}
}
return {
soft: 'bg-util-colors-indigo-indigo-50 text-util-colors-indigo-indigo-500',
solid: 'bg-util-colors-indigo-indigo-500 text-white',
}
}
export const dedupeNodeInfoList = (nodeInfoList: NodeInfo[]) => {
return Array.from(new Map(nodeInfoList.map(nodeInfo => [nodeInfo.node_id, nodeInfo])).values())
}

View File

@ -9,6 +9,7 @@ import type {
JudgmentConditionGroup,
MetricOption,
} from './types'
import type { NodeInfo } from '@/types/evaluation'
import { getComparisonOperators, getDefaultOperator, getEvaluationMockConfig } from './mock'
export type EvaluationStoreResources = Record<string, EvaluationResourceState>
@ -41,13 +42,14 @@ export const getConditionValue = (
return typeof previousValue === 'string' ? previousValue : null
}
export const createBuiltinMetric = (metric: MetricOption): EvaluationMetric => ({
export const createBuiltinMetric = (metric: MetricOption, nodeInfoList: NodeInfo[] = []): EvaluationMetric => ({
id: createId('metric'),
optionId: metric.id,
kind: 'builtin',
label: metric.label,
description: metric.description,
badges: metric.badges,
nodeInfoList,
})
export const createCustomMetricMapping = (): CustomMetricMapping => ({
@ -88,12 +90,9 @@ export const createConditionGroup = (resourceType: EvaluationResourceType): Judg
})
export const buildInitialState = (resourceType: EvaluationResourceType): EvaluationResourceState => {
const config = getEvaluationMockConfig(resourceType)
const defaultMetric = config.builtinMetrics[0]
return {
judgeModelId: null,
metrics: defaultMetric ? [createBuiltinMetric(defaultMetric)] : [],
metrics: [],
conditions: [createConditionGroup(resourceType)],
activeBatchTab: 'input-fields',
uploadedFileName: null,

View File

@ -3,6 +3,7 @@ import type {
EvaluationResourceState,
EvaluationResourceType,
} from './types'
import type { NodeInfo } from '@/types/evaluation'
import { create } from 'zustand'
import { getDefaultOperator, getEvaluationMockConfig } from './mock'
import {
@ -16,7 +17,6 @@ import {
createCustomMetricMapping,
getAllowedOperators as getAllowedOperatorsFromUtils,
getConditionValue,
getResourceState,
isCustomMetricConfigured as isCustomMetricConfiguredFromUtils,
isEvaluationRunnable as isEvaluationRunnableFromUtils,
requiresConditionValue as requiresConditionValueFromUtils,
@ -29,7 +29,7 @@ type EvaluationStore = {
resources: Record<string, EvaluationResourceState>
ensureResource: (resourceType: EvaluationResourceType, resourceId: string) => void
setJudgeModel: (resourceType: EvaluationResourceType, resourceId: string, judgeModelId: string) => void
addBuiltinMetric: (resourceType: EvaluationResourceType, resourceId: string, optionId: string) => void
addBuiltinMetric: (resourceType: EvaluationResourceType, resourceId: string, optionId: string, nodeInfoList?: NodeInfo[]) => void
addCustomMetric: (resourceType: EvaluationResourceType, resourceId: string) => void
removeMetric: (resourceType: EvaluationResourceType, resourceId: string, metricId: string) => void
setCustomMetricWorkflow: (resourceType: EvaluationResourceType, resourceId: string, metricId: string, workflowId: string) => void
@ -85,21 +85,23 @@ export const useEvaluationStore = create<EvaluationStore>((set, get) => ({
})),
}))
},
addBuiltinMetric: (resourceType, resourceId, optionId) => {
addBuiltinMetric: (resourceType, resourceId, optionId, nodeInfoList = []) => {
const option = getEvaluationMockConfig(resourceType).builtinMetrics.find(metric => metric.id === optionId)
if (!option)
return
set((state) => {
const { resource } = getResourceState(state.resources, resourceType, resourceId)
if (resource.metrics.some(metric => metric.optionId === optionId && metric.kind === 'builtin'))
return state
return {
resources: updateResourceState(state.resources, resourceType, resourceId, currentResource => ({
...currentResource,
metrics: [...currentResource.metrics, createBuiltinMetric(option)],
metrics: currentResource.metrics.some(metric => metric.optionId === optionId && metric.kind === 'builtin')
? currentResource.metrics.map(metric => metric.optionId === optionId && metric.kind === 'builtin'
? {
...metric,
nodeInfoList,
}
: metric)
: [...currentResource.metrics, createBuiltinMetric(option, nodeInfoList)],
})),
}
})

View File

@ -1,3 +1,5 @@
import type { NodeInfo } from '@/types/evaluation'
export type EvaluationResourceType = 'workflow' | 'pipeline' | 'snippet'
export type EvaluationResourceProps = {
@ -78,6 +80,7 @@ export type EvaluationMetric = {
label: string
description: string
badges: string[]
nodeInfoList?: NodeInfo[]
customConfig?: CustomMetricConfig
}

View File

@ -54,6 +54,8 @@
"metrics.added": "Added",
"metrics.custom.addMapping": "Add Mapping",
"metrics.custom.description": "Select an evaluation workflow and map your variables before running tests.",
"metrics.custom.footerDescription": "Connect your published evaluation workflows",
"metrics.custom.footerTitle": "Custom metrics",
"metrics.custom.mappingTitle": "Variable Mapping",
"metrics.custom.mappingWarning": "Complete the workflow selection and each variable mapping to enable batch tests.",
"metrics.custom.sourcePlaceholder": "Source variable",
@ -64,12 +66,19 @@
"metrics.custom.workflowPlaceholder": "Select a workflow",
"metrics.description": "Choose from built-in metrics like Groundedness and Correctness to evaluate your workflow outputs.",
"metrics.groups.operations": "Operations",
"metrics.groups.other": "Other",
"metrics.groups.quality": "Quality",
"metrics.noResults": "No metrics match your search.",
"metrics.noNodesInWorkflow": "No LLM nodes in this workflow",
"metrics.noResults": "No metrics or nodes were found",
"metrics.nodesAll": "All nodes",
"metrics.nodesLabel": "Node Scope",
"metrics.nodesSelected": "Selected nodes",
"metrics.remove": "Remove metric",
"metrics.searchNodeOrMetrics": "Search node or metrics",
"metrics.searchPlaceholder": "Search metrics",
"metrics.showLess": "Show less",
"metrics.showMore": "Show more",
"metrics.title": "Metrics",
"metrics.update": "Update",
"title": "Evaluation"
}