344 lines
12 KiB
JavaScript
344 lines
12 KiB
JavaScript
import fs from 'fs';
|
||
import path from 'path';
|
||
import { chatJSON } from './llm.js';
|
||
import { FINDINGS_PATH, EXCLUSIONS_PATH } from './config.js';
|
||
import { line, ok, warn } from './log.js';
|
||
|
||
const LEVELS = ['critical', 'warning', 'info'];
|
||
|
||
/**
|
||
* 用單一角色分析 diff,回傳 findings 陣列
|
||
*/
|
||
export async function analyzeWithRole(role, diff) {
|
||
line(`[${role.name}] 開始分析`);
|
||
const findings = await chatJSON(role.system_prompt, `以下是 Git Diff 內容:\n\n${diff}`);
|
||
const valid = findings.filter(f => f.level && f.role && f.location && f.suggestion)
|
||
.map(f => ({ ...f, is_new: true }));
|
||
ok(`[${role.name}] 找到 ${valid.length} 個問題`);
|
||
return valid;
|
||
}
|
||
|
||
/**
|
||
* 讀取 JSON 陣列檔案,失敗或不存在時回傳空陣列
|
||
*/
|
||
function readJSONArray(fullPath, label) {
|
||
if (!fs.existsSync(fullPath)) {
|
||
warn(`${label}檔案不存在,視為空`);
|
||
return [];
|
||
}
|
||
try {
|
||
const data = JSON.parse(fs.readFileSync(fullPath, 'utf8'));
|
||
return Array.isArray(data) ? data : [];
|
||
} catch (e) {
|
||
warn(`讀取${label}失敗: ${e.message},視為空`);
|
||
return [];
|
||
}
|
||
}
|
||
|
||
function normalizeExclusions(data) {
|
||
if (Array.isArray(data)) return data;
|
||
if (data && Array.isArray(data.excluded_findings)) return data.excluded_findings;
|
||
return [];
|
||
}
|
||
|
||
function formatFileTime(mtimeMs) {
|
||
if (!Number.isFinite(mtimeMs)) return 'unknown';
|
||
return new Date(mtimeMs).toISOString();
|
||
}
|
||
|
||
function cleanText(value) {
|
||
return typeof value === 'string' ? value.trim() : '';
|
||
}
|
||
|
||
function normalizeText(value) {
|
||
return cleanText(value)
|
||
.normalize('NFKC')
|
||
.toLowerCase()
|
||
.replace(/[\p{P}\p{S}\s]+/gu, ' ')
|
||
.replace(/\s+/g, ' ')
|
||
.trim();
|
||
}
|
||
|
||
function toKeyText(value) {
|
||
return cleanText(value)
|
||
.normalize('NFKC')
|
||
.replace(/[\p{P}\p{S}\s]+/gu, '')
|
||
.trim();
|
||
}
|
||
|
||
function getExclusionText(exclusion) {
|
||
return cleanText(exclusion?.original_finding)
|
||
|| cleanText(exclusion?.title)
|
||
|| cleanText(exclusion?.suggestion)
|
||
|| cleanText(exclusion?.reason)
|
||
|| cleanText(exclusion?.note);
|
||
}
|
||
|
||
function normalizeExclusionEntry(exclusion, index) {
|
||
const location = cleanText(exclusion?.location);
|
||
const filePath = location ? location.split(':')[0] : '';
|
||
const role = cleanText(exclusion?.role);
|
||
const text = getExclusionText(exclusion);
|
||
const textKey = toKeyText(text);
|
||
const fingerprint = [filePath || '*', role || '*', textKey || `entry-${index + 1}`].join('|');
|
||
return {
|
||
...exclusion,
|
||
location: location || null,
|
||
filePath,
|
||
role: role || null,
|
||
text,
|
||
textKey,
|
||
fingerprint,
|
||
};
|
||
}
|
||
|
||
function dedupeExclusions(exclusions) {
|
||
const seen = new Set();
|
||
return exclusions.filter(exclusion => {
|
||
if (seen.has(exclusion.fingerprint)) return false;
|
||
seen.add(exclusion.fingerprint);
|
||
return true;
|
||
});
|
||
}
|
||
|
||
function groupExclusionsForAI(exclusions) {
|
||
const groups = new Map();
|
||
for (const exclusion of exclusions) {
|
||
const groupKey = exclusion.textKey || exclusion.fingerprint;
|
||
if (!groups.has(groupKey)) {
|
||
groups.set(groupKey, {
|
||
key: groupKey,
|
||
text: exclusion.text || exclusion.location || exclusion.fingerprint,
|
||
count: 0,
|
||
paths: new Set(),
|
||
roles: new Set(),
|
||
samples: [],
|
||
});
|
||
}
|
||
const group = groups.get(groupKey);
|
||
group.count += 1;
|
||
if (exclusion.filePath) group.paths.add(exclusion.filePath);
|
||
if (exclusion.role) group.roles.add(exclusion.role);
|
||
if (group.samples.length < 2 && exclusion.text) group.samples.push(exclusion.text);
|
||
}
|
||
|
||
return [...groups.values()]
|
||
.sort((a, b) => b.count - a.count || b.paths.size - a.paths.size || a.text.localeCompare(b.text))
|
||
.map(group => ({
|
||
text: group.text,
|
||
count: group.count,
|
||
paths: [...group.paths].sort(),
|
||
roles: [...group.roles].sort(),
|
||
samples: group.samples,
|
||
}));
|
||
}
|
||
|
||
function buildExclusionContext(exclusions) {
|
||
if (exclusions.length === 0) {
|
||
return {
|
||
rawCount: 0,
|
||
uniqueCount: 0,
|
||
groups: [],
|
||
prompt: '',
|
||
};
|
||
}
|
||
|
||
const normalized = exclusions.map((exclusion, index) => normalizeExclusionEntry(exclusion, index));
|
||
const unique = dedupeExclusions(normalized);
|
||
const groups = groupExclusionsForAI(unique);
|
||
const topGroups = groups.slice(0, 12).map(group => ({
|
||
text: group.text,
|
||
count: group.count,
|
||
paths: group.paths.slice(0, 4),
|
||
roles: group.roles.slice(0, 3),
|
||
samples: group.samples.slice(0, 2),
|
||
}));
|
||
const omitted = groups.length - topGroups.length;
|
||
const promptLines = [
|
||
`已知誤報清單(原始 ${exclusions.length} 筆,整理後 ${unique.length} 筆,分成 ${groups.length} 類):`,
|
||
...topGroups.map((group, index) => {
|
||
const parts = [
|
||
`${index + 1}. ${group.text}`,
|
||
`count=${group.count}`,
|
||
];
|
||
if (group.paths.length > 0) parts.push(`paths=${group.paths.join(', ')}`);
|
||
if (group.roles.length > 0) parts.push(`roles=${group.roles.join(', ')}`);
|
||
if (group.samples.length > 0) parts.push(`samples=${group.samples.join(' | ')}`);
|
||
return `- ${parts.join(' ; ')}`;
|
||
}),
|
||
];
|
||
if (omitted > 0) {
|
||
promptLines.push(`- 另有 ${omitted} 類相似排除條目未展開,請依上述群組規則推論。`);
|
||
}
|
||
|
||
return {
|
||
rawCount: exclusions.length,
|
||
uniqueCount: unique.length,
|
||
groupCount: groups.length,
|
||
groups: topGroups,
|
||
prompt: promptLines.join('\n'),
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 讀取舊 findings(從來源分支的 cloned repoDir 中的 FINDINGS_PATH)
|
||
*/
|
||
export function loadOldFindings(workspace) {
|
||
const fullPath = path.join(workspace, FINDINGS_PATH);
|
||
const old = readJSONArray(fullPath, '舊 findings ').map(f => ({ ...f, is_new: false }));
|
||
if (fs.existsSync(fullPath)) {
|
||
const stat = fs.statSync(fullPath);
|
||
line(`讀取舊 findings 檔案: ${fullPath}`);
|
||
line(`舊 findings 檔案資訊: bytes=${stat.size} mtime=${formatFileTime(stat.mtimeMs)} path=${path.relative(workspace, fullPath) || fullPath}`);
|
||
} else {
|
||
warn(`舊 findings 檔案不存在: ${fullPath}`);
|
||
}
|
||
ok(`讀取舊 findings: ${old.length} 筆`);
|
||
return old;
|
||
}
|
||
|
||
/**
|
||
* 合併新舊 findings,以 (role + location + suggestion前50字) 為 key 去除重複
|
||
*/
|
||
export function mergeFindings(oldFindings, newFindings) {
|
||
const key = f => `${f.role}|${f.location}|${String(f.suggestion).slice(0, 50)}`;
|
||
const seen = new Set(oldFindings.map(key));
|
||
const deduped = newFindings.filter(f => {
|
||
if (seen.has(key(f))) return false;
|
||
seen.add(key(f));
|
||
return true;
|
||
});
|
||
const merged = [...oldFindings, ...deduped];
|
||
ok(`合併結果: 舊=${oldFindings.length} 新(去重後)=${deduped.length} 總計=${merged.length}`);
|
||
return merged;
|
||
}
|
||
|
||
/**
|
||
* 依等級排序(critical > warning > info)
|
||
*/
|
||
export function sortByLevel(findings) {
|
||
return [...findings].sort((a, b) => LEVELS.indexOf(a.level) - LEVELS.indexOf(b.level));
|
||
}
|
||
|
||
/**
|
||
* AI 呼叫失敗時的統一降級處理
|
||
*/
|
||
function fallback(label, findings, e) {
|
||
const status = e.response?.status;
|
||
const reason = (status === 402 || status === 429) ? `${status} 額度/限流` : e.message;
|
||
warn(`${label}失敗(${reason}),降級:保留所有問題`);
|
||
return findings;
|
||
}
|
||
|
||
/** 只保留 AI 需要的欄位,減少 token 用量 */
|
||
function toAIPayload(findings) {
|
||
return findings.map(({ level, role, location, suggestion }) => ({ level, role, location, suggestion }));
|
||
}
|
||
|
||
/**
|
||
* 呼叫 LLM 進行語意去重,失敗時降級回傳原始 findings
|
||
*/
|
||
export async function deduplicateWithAI(findings) {
|
||
if (findings.length === 0) return findings;
|
||
|
||
const systemPrompt = `移除語意重複的程式碼審查問題(JSON 陣列)。保留等級較高者(critical > warning > info)。只回傳去重後的 JSON 陣列。`;
|
||
|
||
try {
|
||
const result = await chatJSON(systemPrompt, JSON.stringify(toAIPayload(findings)));
|
||
if (Array.isArray(result) && result.length > 0) {
|
||
ok(`AI 去重: ${findings.length} -> ${result.length} 筆`);
|
||
// 以 location+suggestion 為 key,將原始 findings 的完整欄位(含 is_new)補回
|
||
const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f]));
|
||
return result.map(r => origMap.get(`${r.location}|${String(r.suggestion).slice(0, 50)}`) ?? r);
|
||
}
|
||
throw new Error('AI 回傳空陣列');
|
||
} catch (e) {
|
||
return fallback('AI 去重', findings, e);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 讀取排除問題檔案(從來源分支的 cloned repoDir 中的 EXCLUSIONS_PATH)
|
||
*/
|
||
export function loadExclusions(workspace, repoState = null) {
|
||
const fullPath = path.join(workspace, EXCLUSIONS_PATH);
|
||
if (!fs.existsSync(fullPath)) {
|
||
warn(`排除問題檔案不存在,視為空: ${fullPath}`);
|
||
if (repoState) {
|
||
const branch = repoState.branch || 'detached';
|
||
const shortSha = repoState.shortSha || repoState.headSha || 'unknown';
|
||
line(`來源分支狀態: branch=${branch} commit=${shortSha} commit_time=${repoState.commitTime || 'unknown'}`);
|
||
}
|
||
ok('讀取排除問題: raw=0 normalized=0 筆');
|
||
return [];
|
||
}
|
||
|
||
let exclusions = [];
|
||
let rawCount = 0;
|
||
try {
|
||
const stat = fs.statSync(fullPath);
|
||
const data = JSON.parse(fs.readFileSync(fullPath, 'utf8'));
|
||
rawCount = Array.isArray(data) ? data.length : Array.isArray(data?.excluded_findings) ? data.excluded_findings.length : 0;
|
||
exclusions = dedupeExclusions(normalizeExclusions(data).map((exclusion, index) => normalizeExclusionEntry(exclusion, index)));
|
||
const branch = repoState?.branch || 'detached';
|
||
const shortSha = repoState?.shortSha || repoState?.headSha || 'unknown';
|
||
const commitTime = repoState?.commitTime || 'unknown';
|
||
line(`讀取排除問題檔案: ${fullPath}`);
|
||
line(`來源分支狀態: branch=${branch} commit=${shortSha} commit_time=${commitTime}`);
|
||
line(`檔案資訊: bytes=${stat.size} mtime=${formatFileTime(stat.mtimeMs)} raw=${rawCount} normalized=${exclusions.length} path=${path.relative(workspace, fullPath) || fullPath}`);
|
||
} catch (e) {
|
||
warn(`讀取排除問題失敗: ${e.message},視為空: ${fullPath}`);
|
||
exclusions = [];
|
||
}
|
||
const summary = buildExclusionContext(exclusions);
|
||
ok(`讀取排除問題: raw=${rawCount} normalized=${exclusions.length} groups=${summary.groupCount} 筆`);
|
||
return exclusions;
|
||
}
|
||
|
||
/**
|
||
* 套用排除規則,過濾掉符合排除條件的 findings
|
||
* location 只比對檔案路徑(忽略行數),suggestion 省略時視為萬用
|
||
*/
|
||
export function applyExclusions(findings, exclusions) {
|
||
if (exclusions.length === 0) return findings;
|
||
const before = findings.length;
|
||
const filtered = findings.filter(f => !exclusions.some(ex => {
|
||
const fPath = String(f.location).split(':')[0];
|
||
const exPath = ex.filePath || (ex.location ? String(ex.location).split(':')[0] : null);
|
||
const findingText = normalizeText(f.suggestion || f.title || '');
|
||
const exclusionText = ex.textKey || normalizeText(ex.text || ex.suggestion || ex.title || '');
|
||
const locationMatches = (!exPath || fPath === exPath);
|
||
const roleMatches = (!ex.role || ex.role === f.role);
|
||
const textMatches = !exclusionText || !findingText || findingText.includes(exclusionText) || exclusionText.includes(findingText);
|
||
return locationMatches && roleMatches && (exPath || ex.role ? true : textMatches);
|
||
}));
|
||
ok(`排除過濾: ${before} -> ${filtered.length} 筆(排除 ${before - filtered.length} 筆)`);
|
||
return filtered;
|
||
}
|
||
|
||
/**
|
||
* 呼叫 AI 判斷哪些問題是誤報或不需處理,失敗時降級回傳原始 findings
|
||
*/
|
||
export async function filterFalsePositivesWithAI(findings, exclusions = [], chatFn = chatJSON) {
|
||
if (findings.length === 0) return findings;
|
||
|
||
const exclusionContext = buildExclusionContext(exclusions);
|
||
const exclusionHint = exclusionContext.prompt
|
||
? `\n${exclusionContext.prompt}\n規則:若 finding 與上述任何一類的路徑、角色或描述高度相似,優先視為誤報或不適用。`
|
||
: '';
|
||
|
||
const systemPrompt = `判斷以下程式碼審查問題是否為誤報或不適用(如已正確使用 secrets、CI/CD 必要權限等),移除後只回傳需保留的 JSON 陣列。${exclusionHint}`;
|
||
|
||
try {
|
||
const result = await chatFn(systemPrompt, JSON.stringify(toAIPayload(findings)));
|
||
if (Array.isArray(result) && result.length > 0) {
|
||
ok(`AI 誤報過濾: ${findings.length} -> ${result.length} 筆`);
|
||
const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f]));
|
||
return result.map(r => origMap.get(`${r.location}|${String(r.suggestion).slice(0, 50)}`) ?? r);
|
||
}
|
||
throw new Error('AI 回傳空陣列或非陣列');
|
||
} catch (e) {
|
||
return fallback('AI 誤報過濾', findings, e);
|
||
}
|
||
}
|