feat: optimize exclusion filtering

This commit is contained in:
2026-05-18 02:06:36 +00:00
parent b06a89f2b9
commit d18c4a4a8e
3 changed files with 196 additions and 11 deletions
+149 -8
View File
@@ -46,6 +46,140 @@ function formatFileTime(mtimeMs) {
return new Date(mtimeMs).toISOString();
}
function cleanText(value) {
return typeof value === 'string' ? value.trim() : '';
}
function normalizeText(value) {
return cleanText(value)
.normalize('NFKC')
.toLowerCase()
.replace(/[\p{P}\p{S}\s]+/gu, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function toKeyText(value) {
return cleanText(value)
.normalize('NFKC')
.replace(/[\p{P}\p{S}\s]+/gu, '')
.trim();
}
function getExclusionText(exclusion) {
return cleanText(exclusion?.original_finding)
|| cleanText(exclusion?.title)
|| cleanText(exclusion?.suggestion)
|| cleanText(exclusion?.reason)
|| cleanText(exclusion?.note);
}
function normalizeExclusionEntry(exclusion, index) {
const location = cleanText(exclusion?.location);
const filePath = location ? location.split(':')[0] : '';
const role = cleanText(exclusion?.role);
const text = getExclusionText(exclusion);
const textKey = toKeyText(text);
const fingerprint = [filePath || '*', role || '*', textKey || `entry-${index + 1}`].join('|');
return {
...exclusion,
location: location || null,
filePath,
role: role || null,
text,
textKey,
fingerprint,
};
}
function dedupeExclusions(exclusions) {
const seen = new Set();
return exclusions.filter(exclusion => {
if (seen.has(exclusion.fingerprint)) return false;
seen.add(exclusion.fingerprint);
return true;
});
}
function groupExclusionsForAI(exclusions) {
const groups = new Map();
for (const exclusion of exclusions) {
const groupKey = exclusion.textKey || exclusion.fingerprint;
if (!groups.has(groupKey)) {
groups.set(groupKey, {
key: groupKey,
text: exclusion.text || exclusion.location || exclusion.fingerprint,
count: 0,
paths: new Set(),
roles: new Set(),
samples: [],
});
}
const group = groups.get(groupKey);
group.count += 1;
if (exclusion.filePath) group.paths.add(exclusion.filePath);
if (exclusion.role) group.roles.add(exclusion.role);
if (group.samples.length < 2 && exclusion.text) group.samples.push(exclusion.text);
}
return [...groups.values()]
.sort((a, b) => b.count - a.count || b.paths.size - a.paths.size || a.text.localeCompare(b.text))
.map(group => ({
text: group.text,
count: group.count,
paths: [...group.paths].sort(),
roles: [...group.roles].sort(),
samples: group.samples,
}));
}
function buildExclusionContext(exclusions) {
if (exclusions.length === 0) {
return {
rawCount: 0,
uniqueCount: 0,
groups: [],
prompt: '',
};
}
const normalized = exclusions.map((exclusion, index) => normalizeExclusionEntry(exclusion, index));
const unique = dedupeExclusions(normalized);
const groups = groupExclusionsForAI(unique);
const topGroups = groups.slice(0, 12).map(group => ({
text: group.text,
count: group.count,
paths: group.paths.slice(0, 4),
roles: group.roles.slice(0, 3),
samples: group.samples.slice(0, 2),
}));
const omitted = groups.length - topGroups.length;
const promptLines = [
`已知誤報清單(原始 ${exclusions.length} 筆,整理後 ${unique.length} 筆,分成 ${groups.length} 類):`,
...topGroups.map((group, index) => {
const parts = [
`${index + 1}. ${group.text}`,
`count=${group.count}`,
];
if (group.paths.length > 0) parts.push(`paths=${group.paths.join(', ')}`);
if (group.roles.length > 0) parts.push(`roles=${group.roles.join(', ')}`);
if (group.samples.length > 0) parts.push(`samples=${group.samples.join(' | ')}`);
return `- ${parts.join(' ; ')}`;
}),
];
if (omitted > 0) {
promptLines.push(`- 另有 ${omitted} 類相似排除條目未展開,請依上述群組規則推論。`);
}
return {
rawCount: exclusions.length,
uniqueCount: unique.length,
groupCount: groups.length,
groups: topGroups,
prompt: promptLines.join('\n'),
};
}
/**
* 讀取舊 findings(從來源分支的 cloned repoDir 中的 FINDINGS_PATH
*/
@@ -145,7 +279,7 @@ export function loadExclusions(workspace, repoState = null) {
const stat = fs.statSync(fullPath);
const data = JSON.parse(fs.readFileSync(fullPath, 'utf8'));
rawCount = Array.isArray(data) ? data.length : Array.isArray(data?.excluded_findings) ? data.excluded_findings.length : 0;
exclusions = normalizeExclusions(data);
exclusions = dedupeExclusions(normalizeExclusions(data).map((exclusion, index) => normalizeExclusionEntry(exclusion, index)));
const branch = repoState?.branch || 'detached';
const shortSha = repoState?.shortSha || repoState?.headSha || 'unknown';
const commitTime = repoState?.commitTime || 'unknown';
@@ -156,7 +290,8 @@ export function loadExclusions(workspace, repoState = null) {
warn(`讀取排除問題失敗: ${e.message},視為空: ${fullPath}`);
exclusions = [];
}
ok(`讀取排除問題: raw=${rawCount} normalized=${exclusions.length}`);
const summary = buildExclusionContext(exclusions);
ok(`讀取排除問題: raw=${rawCount} normalized=${exclusions.length} groups=${summary.groupCount}`);
return exclusions;
}
@@ -169,8 +304,13 @@ export function applyExclusions(findings, exclusions) {
const before = findings.length;
const filtered = findings.filter(f => !exclusions.some(ex => {
const fPath = String(f.location).split(':')[0];
const exPath = ex.location ? String(ex.location).split(':')[0] : null;
return (!exPath || fPath === exPath) && (!ex.role || ex.role === f.role);
const exPath = ex.filePath || (ex.location ? String(ex.location).split(':')[0] : null);
const findingText = normalizeText(f.suggestion || f.title || '');
const exclusionText = ex.textKey || normalizeText(ex.text || ex.suggestion || ex.title || '');
const locationMatches = (!exPath || fPath === exPath);
const roleMatches = (!ex.role || ex.role === f.role);
const textMatches = !exclusionText || !findingText || findingText.includes(exclusionText) || exclusionText.includes(findingText);
return locationMatches && roleMatches && (exPath || ex.role ? true : textMatches);
}));
ok(`排除過濾: ${before} -> ${filtered.length} 筆(排除 ${before - filtered.length} 筆)`);
return filtered;
@@ -179,17 +319,18 @@ export function applyExclusions(findings, exclusions) {
/**
* 呼叫 AI 判斷哪些問題是誤報或不需處理,失敗時降級回傳原始 findings
*/
export async function filterFalsePositivesWithAI(findings, exclusions = []) {
export async function filterFalsePositivesWithAI(findings, exclusions = [], chatFn = chatJSON) {
if (findings.length === 0) return findings;
const exclusionHint = exclusions.length > 0
? `\n已知誤報(相同路徑且語意相近者一併排除):\n${JSON.stringify(exclusions.map(({ location, suggestion }) => ({ location, suggestion })))}`
const exclusionContext = buildExclusionContext(exclusions);
const exclusionHint = exclusionContext.prompt
? `\n${exclusionContext.prompt}\n規則:若 finding 與上述任何一類的路徑、角色或描述高度相似,優先視為誤報或不適用。`
: '';
const systemPrompt = `判斷以下程式碼審查問題是否為誤報或不適用(如已正確使用 secrets、CI/CD 必要權限等),移除後只回傳需保留的 JSON 陣列。${exclusionHint}`;
try {
const result = await chatJSON(systemPrompt, JSON.stringify(toAIPayload(findings)));
const result = await chatFn(systemPrompt, JSON.stringify(toAIPayload(findings)));
if (Array.isArray(result) && result.length > 0) {
ok(`AI 誤報過濾: ${findings.length} -> ${result.length}`);
const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f]));