Files
code-review/app/findings.js
T

344 lines
12 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import fs from 'fs';
import path from 'path';
import { chatJSON } from './llm.js';
import { FINDINGS_PATH, EXCLUSIONS_PATH } from './config.js';
import { line, ok, warn } from './log.js';
const LEVELS = ['critical', 'warning', 'info'];
/**
* 用單一角色分析 diff,回傳 findings 陣列
*/
export async function analyzeWithRole(role, diff) {
line(`[${role.name}] 開始分析`);
const findings = await chatJSON(role.system_prompt, `以下是 Git Diff 內容:\n\n${diff}`);
const valid = findings.filter(f => f.level && f.role && f.location && f.suggestion)
.map(f => ({ ...f, is_new: true }));
ok(`[${role.name}] 找到 ${valid.length} 個問題`);
return valid;
}
/**
* 讀取 JSON 陣列檔案,失敗或不存在時回傳空陣列
*/
function readJSONArray(fullPath, label) {
if (!fs.existsSync(fullPath)) {
warn(`${label}檔案不存在,視為空`);
return [];
}
try {
const data = JSON.parse(fs.readFileSync(fullPath, 'utf8'));
return Array.isArray(data) ? data : [];
} catch (e) {
warn(`讀取${label}失敗: ${e.message},視為空`);
return [];
}
}
function normalizeExclusions(data) {
if (Array.isArray(data)) return data;
if (data && Array.isArray(data.excluded_findings)) return data.excluded_findings;
return [];
}
function formatFileTime(mtimeMs) {
if (!Number.isFinite(mtimeMs)) return 'unknown';
return new Date(mtimeMs).toISOString();
}
function cleanText(value) {
return typeof value === 'string' ? value.trim() : '';
}
function normalizeText(value) {
return cleanText(value)
.normalize('NFKC')
.toLowerCase()
.replace(/[\p{P}\p{S}\s]+/gu, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function toKeyText(value) {
return cleanText(value)
.normalize('NFKC')
.replace(/[\p{P}\p{S}\s]+/gu, '')
.trim();
}
function getExclusionText(exclusion) {
return cleanText(exclusion?.original_finding)
|| cleanText(exclusion?.title)
|| cleanText(exclusion?.suggestion)
|| cleanText(exclusion?.reason)
|| cleanText(exclusion?.note);
}
function normalizeExclusionEntry(exclusion, index) {
const location = cleanText(exclusion?.location);
const filePath = location ? location.split(':')[0] : '';
const role = cleanText(exclusion?.role);
const text = getExclusionText(exclusion);
const textKey = toKeyText(text);
const fingerprint = [filePath || '*', role || '*', textKey || `entry-${index + 1}`].join('|');
return {
...exclusion,
location: location || null,
filePath,
role: role || null,
text,
textKey,
fingerprint,
};
}
function dedupeExclusions(exclusions) {
const seen = new Set();
return exclusions.filter(exclusion => {
if (seen.has(exclusion.fingerprint)) return false;
seen.add(exclusion.fingerprint);
return true;
});
}
function groupExclusionsForAI(exclusions) {
const groups = new Map();
for (const exclusion of exclusions) {
const groupKey = exclusion.textKey || exclusion.fingerprint;
if (!groups.has(groupKey)) {
groups.set(groupKey, {
key: groupKey,
text: exclusion.text || exclusion.location || exclusion.fingerprint,
count: 0,
paths: new Set(),
roles: new Set(),
samples: [],
});
}
const group = groups.get(groupKey);
group.count += 1;
if (exclusion.filePath) group.paths.add(exclusion.filePath);
if (exclusion.role) group.roles.add(exclusion.role);
if (group.samples.length < 2 && exclusion.text) group.samples.push(exclusion.text);
}
return [...groups.values()]
.sort((a, b) => b.count - a.count || b.paths.size - a.paths.size || a.text.localeCompare(b.text))
.map(group => ({
text: group.text,
count: group.count,
paths: [...group.paths].sort(),
roles: [...group.roles].sort(),
samples: group.samples,
}));
}
function buildExclusionContext(exclusions) {
if (exclusions.length === 0) {
return {
rawCount: 0,
uniqueCount: 0,
groups: [],
prompt: '',
};
}
const normalized = exclusions.map((exclusion, index) => normalizeExclusionEntry(exclusion, index));
const unique = dedupeExclusions(normalized);
const groups = groupExclusionsForAI(unique);
const topGroups = groups.slice(0, 12).map(group => ({
text: group.text,
count: group.count,
paths: group.paths.slice(0, 4),
roles: group.roles.slice(0, 3),
samples: group.samples.slice(0, 2),
}));
const omitted = groups.length - topGroups.length;
const promptLines = [
`已知誤報清單(原始 ${exclusions.length} 筆,整理後 ${unique.length} 筆,分成 ${groups.length} 類):`,
...topGroups.map((group, index) => {
const parts = [
`${index + 1}. ${group.text}`,
`count=${group.count}`,
];
if (group.paths.length > 0) parts.push(`paths=${group.paths.join(', ')}`);
if (group.roles.length > 0) parts.push(`roles=${group.roles.join(', ')}`);
if (group.samples.length > 0) parts.push(`samples=${group.samples.join(' | ')}`);
return `- ${parts.join(' ; ')}`;
}),
];
if (omitted > 0) {
promptLines.push(`- 另有 ${omitted} 類相似排除條目未展開,請依上述群組規則推論。`);
}
return {
rawCount: exclusions.length,
uniqueCount: unique.length,
groupCount: groups.length,
groups: topGroups,
prompt: promptLines.join('\n'),
};
}
/**
* 讀取舊 findings(從來源分支的 cloned repoDir 中的 FINDINGS_PATH
*/
export function loadOldFindings(workspace) {
const fullPath = path.join(workspace, FINDINGS_PATH);
const old = readJSONArray(fullPath, '舊 findings ').map(f => ({ ...f, is_new: false }));
if (fs.existsSync(fullPath)) {
const stat = fs.statSync(fullPath);
line(`讀取舊 findings 檔案: ${fullPath}`);
line(`舊 findings 檔案資訊: bytes=${stat.size} mtime=${formatFileTime(stat.mtimeMs)} path=${path.relative(workspace, fullPath) || fullPath}`);
} else {
warn(`舊 findings 檔案不存在: ${fullPath}`);
}
ok(`讀取舊 findings: ${old.length}`);
return old;
}
/**
* 合併新舊 findings,以 (role + location + suggestion前50字) 為 key 去除重複
*/
export function mergeFindings(oldFindings, newFindings) {
const key = f => `${f.role}|${f.location}|${String(f.suggestion).slice(0, 50)}`;
const seen = new Set(oldFindings.map(key));
const deduped = newFindings.filter(f => {
if (seen.has(key(f))) return false;
seen.add(key(f));
return true;
});
const merged = [...oldFindings, ...deduped];
ok(`合併結果: 舊=${oldFindings.length} 新(去重後)=${deduped.length} 總計=${merged.length}`);
return merged;
}
/**
* 依等級排序(critical > warning > info
*/
export function sortByLevel(findings) {
return [...findings].sort((a, b) => LEVELS.indexOf(a.level) - LEVELS.indexOf(b.level));
}
/**
* AI 呼叫失敗時的統一降級處理
*/
function fallback(label, findings, e) {
const status = e.response?.status;
const reason = (status === 402 || status === 429) ? `${status} 額度/限流` : e.message;
warn(`${label}失敗(${reason}),降級:保留所有問題`);
return findings;
}
/** 只保留 AI 需要的欄位,減少 token 用量 */
function toAIPayload(findings) {
return findings.map(({ level, role, location, suggestion }) => ({ level, role, location, suggestion }));
}
/**
* 呼叫 LLM 進行語意去重,失敗時降級回傳原始 findings
*/
export async function deduplicateWithAI(findings) {
if (findings.length === 0) return findings;
const systemPrompt = `移除語意重複的程式碼審查問題(JSON 陣列)。保留等級較高者(critical > warning > info)。只回傳去重後的 JSON 陣列。`;
try {
const result = await chatJSON(systemPrompt, JSON.stringify(toAIPayload(findings)));
if (Array.isArray(result) && result.length > 0) {
ok(`AI 去重: ${findings.length} -> ${result.length}`);
// 以 location+suggestion 為 key,將原始 findings 的完整欄位(含 is_new)補回
const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f]));
return result.map(r => origMap.get(`${r.location}|${String(r.suggestion).slice(0, 50)}`) ?? r);
}
throw new Error('AI 回傳空陣列');
} catch (e) {
return fallback('AI 去重', findings, e);
}
}
/**
* 讀取排除問題檔案(從來源分支的 cloned repoDir 中的 EXCLUSIONS_PATH
*/
export function loadExclusions(workspace, repoState = null) {
const fullPath = path.join(workspace, EXCLUSIONS_PATH);
if (!fs.existsSync(fullPath)) {
warn(`排除問題檔案不存在,視為空: ${fullPath}`);
if (repoState) {
const branch = repoState.branch || 'detached';
const shortSha = repoState.shortSha || repoState.headSha || 'unknown';
line(`來源分支狀態: branch=${branch} commit=${shortSha} commit_time=${repoState.commitTime || 'unknown'}`);
}
ok('讀取排除問題: raw=0 normalized=0 筆');
return [];
}
let exclusions = [];
let rawCount = 0;
try {
const stat = fs.statSync(fullPath);
const data = JSON.parse(fs.readFileSync(fullPath, 'utf8'));
rawCount = Array.isArray(data) ? data.length : Array.isArray(data?.excluded_findings) ? data.excluded_findings.length : 0;
exclusions = dedupeExclusions(normalizeExclusions(data).map((exclusion, index) => normalizeExclusionEntry(exclusion, index)));
const branch = repoState?.branch || 'detached';
const shortSha = repoState?.shortSha || repoState?.headSha || 'unknown';
const commitTime = repoState?.commitTime || 'unknown';
line(`讀取排除問題檔案: ${fullPath}`);
line(`來源分支狀態: branch=${branch} commit=${shortSha} commit_time=${commitTime}`);
line(`檔案資訊: bytes=${stat.size} mtime=${formatFileTime(stat.mtimeMs)} raw=${rawCount} normalized=${exclusions.length} path=${path.relative(workspace, fullPath) || fullPath}`);
} catch (e) {
warn(`讀取排除問題失敗: ${e.message},視為空: ${fullPath}`);
exclusions = [];
}
const summary = buildExclusionContext(exclusions);
ok(`讀取排除問題: raw=${rawCount} normalized=${exclusions.length} groups=${summary.groupCount}`);
return exclusions;
}
/**
* 套用排除規則,過濾掉符合排除條件的 findings
* location 只比對檔案路徑(忽略行數),suggestion 省略時視為萬用
*/
export function applyExclusions(findings, exclusions) {
if (exclusions.length === 0) return findings;
const before = findings.length;
const filtered = findings.filter(f => !exclusions.some(ex => {
const fPath = String(f.location).split(':')[0];
const exPath = ex.filePath || (ex.location ? String(ex.location).split(':')[0] : null);
const findingText = normalizeText(f.suggestion || f.title || '');
const exclusionText = ex.textKey || normalizeText(ex.text || ex.suggestion || ex.title || '');
const locationMatches = (!exPath || fPath === exPath);
const roleMatches = (!ex.role || ex.role === f.role);
const textMatches = !exclusionText || !findingText || findingText.includes(exclusionText) || exclusionText.includes(findingText);
return locationMatches && roleMatches && (exPath || ex.role ? true : textMatches);
}));
ok(`排除過濾: ${before} -> ${filtered.length} 筆(排除 ${before - filtered.length} 筆)`);
return filtered;
}
/**
* 呼叫 AI 判斷哪些問題是誤報或不需處理,失敗時降級回傳原始 findings
*/
export async function filterFalsePositivesWithAI(findings, exclusions = [], chatFn = chatJSON) {
if (findings.length === 0) return findings;
const exclusionContext = buildExclusionContext(exclusions);
const exclusionHint = exclusionContext.prompt
? `\n${exclusionContext.prompt}\n規則:若 finding 與上述任何一類的路徑、角色或描述高度相似,優先視為誤報或不適用。`
: '';
const systemPrompt = `判斷以下程式碼審查問題是否為誤報或不適用(如已正確使用 secrets、CI/CD 必要權限等),移除後只回傳需保留的 JSON 陣列。${exclusionHint}`;
try {
const result = await chatFn(systemPrompt, JSON.stringify(toAIPayload(findings)));
if (Array.isArray(result) && result.length > 0) {
ok(`AI 誤報過濾: ${findings.length} -> ${result.length}`);
const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f]));
return result.map(r => origMap.get(`${r.location}|${String(r.suggestion).slice(0, 50)}`) ?? r);
}
throw new Error('AI 回傳空陣列或非陣列');
} catch (e) {
return fallback('AI 誤報過濾', findings, e);
}
}