feat: optimize exclusion filtering
This commit is contained in:
@@ -21,8 +21,8 @@
|
||||
- 已驗收:log 已出現 `AI 去重: 13 -> 11 筆`,且程式具備失敗時保留所有問題的降級處理。
|
||||
|
||||
## 階段五:AI 排除問題過濾
|
||||
- 目標:讀取排除問題檔案(`.gitea/ai-review/exclusions.json`)進行規則過濾,並呼叫 AI 判斷剩餘問題是否為誤報或不適用,兩層過濾後產生最終問題清單。
|
||||
- 驗收:log 中能看到排除問題檔案讀取成功或不存在的訊息、規則過濾數量變化,以及「AI 誤報過濾: N -> M 筆」或降級訊息。
|
||||
- 目標:讀取排除問題檔案(`.gitea/ai-review/exclusions.json`)時先去除重複條目、整理成語意群組摘要,再進行規則過濾並呼叫 AI 判斷剩餘問題是否為誤報或不適用,兩層過濾後產生最終問題清單。
|
||||
- 驗收:log 中能看到排除問題檔案讀取成功或不存在的訊息、重複排除條目的整理摘要、規則過濾數量變化,以及「AI 誤報過濾: N -> M 筆」或降級訊息。
|
||||
- 部分驗收:log 已顯示 `讀取排除問題: 50 筆` 與 `排除過濾: 11 -> 0 筆`,但這次未進入 `AI 誤報過濾: N -> M 筆` 的正向路徑。
|
||||
- 可驗收紀錄情境:當 `排除過濾` 後仍保留 1 筆以上 findings 時,log 會出現 `AI 誤報過濾: N -> M 筆`;若 API 額度不足或回傳失敗,則會出現 `AI 誤報過濾失敗(...),降級:保留所有問題`。
|
||||
|
||||
|
||||
+149
-8
@@ -46,6 +46,140 @@ function formatFileTime(mtimeMs) {
|
||||
return new Date(mtimeMs).toISOString();
|
||||
}
|
||||
|
||||
function cleanText(value) {
|
||||
return typeof value === 'string' ? value.trim() : '';
|
||||
}
|
||||
|
||||
function normalizeText(value) {
|
||||
return cleanText(value)
|
||||
.normalize('NFKC')
|
||||
.toLowerCase()
|
||||
.replace(/[\p{P}\p{S}\s]+/gu, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function toKeyText(value) {
|
||||
return cleanText(value)
|
||||
.normalize('NFKC')
|
||||
.replace(/[\p{P}\p{S}\s]+/gu, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function getExclusionText(exclusion) {
|
||||
return cleanText(exclusion?.original_finding)
|
||||
|| cleanText(exclusion?.title)
|
||||
|| cleanText(exclusion?.suggestion)
|
||||
|| cleanText(exclusion?.reason)
|
||||
|| cleanText(exclusion?.note);
|
||||
}
|
||||
|
||||
function normalizeExclusionEntry(exclusion, index) {
|
||||
const location = cleanText(exclusion?.location);
|
||||
const filePath = location ? location.split(':')[0] : '';
|
||||
const role = cleanText(exclusion?.role);
|
||||
const text = getExclusionText(exclusion);
|
||||
const textKey = toKeyText(text);
|
||||
const fingerprint = [filePath || '*', role || '*', textKey || `entry-${index + 1}`].join('|');
|
||||
return {
|
||||
...exclusion,
|
||||
location: location || null,
|
||||
filePath,
|
||||
role: role || null,
|
||||
text,
|
||||
textKey,
|
||||
fingerprint,
|
||||
};
|
||||
}
|
||||
|
||||
function dedupeExclusions(exclusions) {
|
||||
const seen = new Set();
|
||||
return exclusions.filter(exclusion => {
|
||||
if (seen.has(exclusion.fingerprint)) return false;
|
||||
seen.add(exclusion.fingerprint);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
function groupExclusionsForAI(exclusions) {
|
||||
const groups = new Map();
|
||||
for (const exclusion of exclusions) {
|
||||
const groupKey = exclusion.textKey || exclusion.fingerprint;
|
||||
if (!groups.has(groupKey)) {
|
||||
groups.set(groupKey, {
|
||||
key: groupKey,
|
||||
text: exclusion.text || exclusion.location || exclusion.fingerprint,
|
||||
count: 0,
|
||||
paths: new Set(),
|
||||
roles: new Set(),
|
||||
samples: [],
|
||||
});
|
||||
}
|
||||
const group = groups.get(groupKey);
|
||||
group.count += 1;
|
||||
if (exclusion.filePath) group.paths.add(exclusion.filePath);
|
||||
if (exclusion.role) group.roles.add(exclusion.role);
|
||||
if (group.samples.length < 2 && exclusion.text) group.samples.push(exclusion.text);
|
||||
}
|
||||
|
||||
return [...groups.values()]
|
||||
.sort((a, b) => b.count - a.count || b.paths.size - a.paths.size || a.text.localeCompare(b.text))
|
||||
.map(group => ({
|
||||
text: group.text,
|
||||
count: group.count,
|
||||
paths: [...group.paths].sort(),
|
||||
roles: [...group.roles].sort(),
|
||||
samples: group.samples,
|
||||
}));
|
||||
}
|
||||
|
||||
function buildExclusionContext(exclusions) {
|
||||
if (exclusions.length === 0) {
|
||||
return {
|
||||
rawCount: 0,
|
||||
uniqueCount: 0,
|
||||
groups: [],
|
||||
prompt: '',
|
||||
};
|
||||
}
|
||||
|
||||
const normalized = exclusions.map((exclusion, index) => normalizeExclusionEntry(exclusion, index));
|
||||
const unique = dedupeExclusions(normalized);
|
||||
const groups = groupExclusionsForAI(unique);
|
||||
const topGroups = groups.slice(0, 12).map(group => ({
|
||||
text: group.text,
|
||||
count: group.count,
|
||||
paths: group.paths.slice(0, 4),
|
||||
roles: group.roles.slice(0, 3),
|
||||
samples: group.samples.slice(0, 2),
|
||||
}));
|
||||
const omitted = groups.length - topGroups.length;
|
||||
const promptLines = [
|
||||
`已知誤報清單(原始 ${exclusions.length} 筆,整理後 ${unique.length} 筆,分成 ${groups.length} 類):`,
|
||||
...topGroups.map((group, index) => {
|
||||
const parts = [
|
||||
`${index + 1}. ${group.text}`,
|
||||
`count=${group.count}`,
|
||||
];
|
||||
if (group.paths.length > 0) parts.push(`paths=${group.paths.join(', ')}`);
|
||||
if (group.roles.length > 0) parts.push(`roles=${group.roles.join(', ')}`);
|
||||
if (group.samples.length > 0) parts.push(`samples=${group.samples.join(' | ')}`);
|
||||
return `- ${parts.join(' ; ')}`;
|
||||
}),
|
||||
];
|
||||
if (omitted > 0) {
|
||||
promptLines.push(`- 另有 ${omitted} 類相似排除條目未展開,請依上述群組規則推論。`);
|
||||
}
|
||||
|
||||
return {
|
||||
rawCount: exclusions.length,
|
||||
uniqueCount: unique.length,
|
||||
groupCount: groups.length,
|
||||
groups: topGroups,
|
||||
prompt: promptLines.join('\n'),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 讀取舊 findings(從來源分支的 cloned repoDir 中的 FINDINGS_PATH)
|
||||
*/
|
||||
@@ -145,7 +279,7 @@ export function loadExclusions(workspace, repoState = null) {
|
||||
const stat = fs.statSync(fullPath);
|
||||
const data = JSON.parse(fs.readFileSync(fullPath, 'utf8'));
|
||||
rawCount = Array.isArray(data) ? data.length : Array.isArray(data?.excluded_findings) ? data.excluded_findings.length : 0;
|
||||
exclusions = normalizeExclusions(data);
|
||||
exclusions = dedupeExclusions(normalizeExclusions(data).map((exclusion, index) => normalizeExclusionEntry(exclusion, index)));
|
||||
const branch = repoState?.branch || 'detached';
|
||||
const shortSha = repoState?.shortSha || repoState?.headSha || 'unknown';
|
||||
const commitTime = repoState?.commitTime || 'unknown';
|
||||
@@ -156,7 +290,8 @@ export function loadExclusions(workspace, repoState = null) {
|
||||
warn(`讀取排除問題失敗: ${e.message},視為空: ${fullPath}`);
|
||||
exclusions = [];
|
||||
}
|
||||
ok(`讀取排除問題: raw=${rawCount} normalized=${exclusions.length} 筆`);
|
||||
const summary = buildExclusionContext(exclusions);
|
||||
ok(`讀取排除問題: raw=${rawCount} normalized=${exclusions.length} groups=${summary.groupCount} 筆`);
|
||||
return exclusions;
|
||||
}
|
||||
|
||||
@@ -169,8 +304,13 @@ export function applyExclusions(findings, exclusions) {
|
||||
const before = findings.length;
|
||||
const filtered = findings.filter(f => !exclusions.some(ex => {
|
||||
const fPath = String(f.location).split(':')[0];
|
||||
const exPath = ex.location ? String(ex.location).split(':')[0] : null;
|
||||
return (!exPath || fPath === exPath) && (!ex.role || ex.role === f.role);
|
||||
const exPath = ex.filePath || (ex.location ? String(ex.location).split(':')[0] : null);
|
||||
const findingText = normalizeText(f.suggestion || f.title || '');
|
||||
const exclusionText = ex.textKey || normalizeText(ex.text || ex.suggestion || ex.title || '');
|
||||
const locationMatches = (!exPath || fPath === exPath);
|
||||
const roleMatches = (!ex.role || ex.role === f.role);
|
||||
const textMatches = !exclusionText || !findingText || findingText.includes(exclusionText) || exclusionText.includes(findingText);
|
||||
return locationMatches && roleMatches && (exPath || ex.role ? true : textMatches);
|
||||
}));
|
||||
ok(`排除過濾: ${before} -> ${filtered.length} 筆(排除 ${before - filtered.length} 筆)`);
|
||||
return filtered;
|
||||
@@ -179,17 +319,18 @@ export function applyExclusions(findings, exclusions) {
|
||||
/**
|
||||
* 呼叫 AI 判斷哪些問題是誤報或不需處理,失敗時降級回傳原始 findings
|
||||
*/
|
||||
export async function filterFalsePositivesWithAI(findings, exclusions = []) {
|
||||
export async function filterFalsePositivesWithAI(findings, exclusions = [], chatFn = chatJSON) {
|
||||
if (findings.length === 0) return findings;
|
||||
|
||||
const exclusionHint = exclusions.length > 0
|
||||
? `\n已知誤報(相同路徑且語意相近者一併排除):\n${JSON.stringify(exclusions.map(({ location, suggestion }) => ({ location, suggestion })))}`
|
||||
const exclusionContext = buildExclusionContext(exclusions);
|
||||
const exclusionHint = exclusionContext.prompt
|
||||
? `\n${exclusionContext.prompt}\n規則:若 finding 與上述任何一類的路徑、角色或描述高度相似,優先視為誤報或不適用。`
|
||||
: '';
|
||||
|
||||
const systemPrompt = `判斷以下程式碼審查問題是否為誤報或不適用(如已正確使用 secrets、CI/CD 必要權限等),移除後只回傳需保留的 JSON 陣列。${exclusionHint}`;
|
||||
|
||||
try {
|
||||
const result = await chatJSON(systemPrompt, JSON.stringify(toAIPayload(findings)));
|
||||
const result = await chatFn(systemPrompt, JSON.stringify(toAIPayload(findings)));
|
||||
if (Array.isArray(result) && result.length > 0) {
|
||||
ok(`AI 誤報過濾: ${findings.length} -> ${result.length} 筆`);
|
||||
const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f]));
|
||||
|
||||
+45
-1
@@ -3,7 +3,7 @@ import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { loadOldFindings, loadExclusions, applyExclusions } from './findings.js';
|
||||
import { loadOldFindings, loadExclusions, applyExclusions, filterFalsePositivesWithAI } from './findings.js';
|
||||
import { EXCLUSIONS_PATH, FINDINGS_PATH } from './config.js';
|
||||
|
||||
describe('findings exclusions', () => {
|
||||
@@ -56,6 +56,50 @@ describe('findings exclusions', () => {
|
||||
assert.equal(filtered[0].location, 'README.md:12');
|
||||
});
|
||||
|
||||
it('dedupes repeated exclusions when loading exclusions', () => {
|
||||
const fullPath = path.join(workspace, EXCLUSIONS_PATH);
|
||||
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
|
||||
fs.writeFileSync(fullPath, JSON.stringify([
|
||||
{ location: 'entrypoint.sh:180', title: 'fetch_package_versions jq overhead' },
|
||||
{ location: 'entrypoint.sh:999', title: 'fetch_package_versions jq overhead' },
|
||||
{ location: 'entrypoint.sh:180', title: 'fetch_package_versions jq overhead' },
|
||||
], null, 2));
|
||||
|
||||
const exclusions = loadExclusions(workspace);
|
||||
|
||||
assert.equal(exclusions.length, 1);
|
||||
assert.equal(exclusions[0].filePath, 'entrypoint.sh');
|
||||
assert.equal(exclusions[0].text, 'fetch_package_versions jq overhead');
|
||||
});
|
||||
|
||||
it('builds a compact exclusion hint for AI', async () => {
|
||||
const findings = [
|
||||
{ level: 'warning', role: 'Maya', location: 'src/app.cs:12', suggestion: 'update tests' },
|
||||
];
|
||||
const exclusions = [
|
||||
{ location: 'src/app.cs:1', original_finding: '更新套件後請補上測試驗證' },
|
||||
{ location: 'src/app.cs:99', original_finding: '更新套件後請補上測試驗證 ' },
|
||||
{ location: 'src/service.cs:3', original_finding: '更新套件後請補上測試驗證' },
|
||||
{ location: 'src/service.cs:8', title: '請確認安全性變更' },
|
||||
];
|
||||
|
||||
let capturedSystemPrompt = '';
|
||||
let capturedUserContent = '';
|
||||
const result = await filterFalsePositivesWithAI(findings, exclusions, async (systemPrompt, userContent) => {
|
||||
capturedSystemPrompt = systemPrompt;
|
||||
capturedUserContent = userContent;
|
||||
return findings;
|
||||
});
|
||||
|
||||
assert.equal(result.length, 1);
|
||||
assert.ok(capturedSystemPrompt.includes('已知誤報清單(原始 4 筆,整理後 3 筆,分成 2 類)'));
|
||||
assert.ok(capturedSystemPrompt.includes('更新套件後請補上測試驗證'));
|
||||
assert.ok(capturedSystemPrompt.includes('paths=src/app.cs, src/service.cs'));
|
||||
assert.ok(capturedSystemPrompt.includes('請確認安全性變更'));
|
||||
assert.ok(capturedUserContent.includes('"location":"src/app.cs:12"'));
|
||||
assert.ok(capturedUserContent.includes('"suggestion":"update tests"'));
|
||||
});
|
||||
|
||||
it('logs exclusions file metadata and repo state when loading exclusions', () => {
|
||||
const fullPath = path.join(workspace, EXCLUSIONS_PATH);
|
||||
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
|
||||
|
||||
Reference in New Issue
Block a user