import fs from 'fs'; import path from 'path'; import { chatJSON } from './llm.js'; import { FINDINGS_PATH, EXCLUSIONS_PATH } from './config.js'; import { line, ok, warn } from './log.js'; const LEVELS = ['critical', 'warning', 'info']; /** * 用單一角色分析 diff,回傳 findings 陣列 */ export async function analyzeWithRole(role, diff) { line(`[${role.name}] 開始分析`); const findings = await chatJSON(role.system_prompt, `以下是 Git Diff 內容:\n\n${diff}`); const valid = findings.filter(f => f.level && f.role && f.location && f.suggestion) .map(f => ({ ...f, is_new: true })); ok(`[${role.name}] 找到 ${valid.length} 個問題`); return valid; } /** * 讀取 JSON 陣列檔案,失敗或不存在時回傳空陣列 */ function readJSONArray(fullPath, label) { if (!fs.existsSync(fullPath)) { warn(`${label}檔案不存在,視為空`); return []; } try { const data = JSON.parse(fs.readFileSync(fullPath, 'utf8')); return Array.isArray(data) ? data : []; } catch (e) { warn(`讀取${label}失敗: ${e.message},視為空`); return []; } } function normalizeExclusions(data) { if (Array.isArray(data)) return data; if (data && Array.isArray(data.exclusions)) return data.exclusions; if (data && Array.isArray(data.excluded_findings)) return data.excluded_findings; return []; } function detectExclusionSource(data) { if (Array.isArray(data)) return 'array'; if (data && Array.isArray(data.exclusions)) return 'exclusions'; if (data && Array.isArray(data.excluded_findings)) return 'excluded_findings'; return 'unknown'; } function writeCanonicalExclusions(fullPath, exclusions) { fs.writeFileSync(fullPath, JSON.stringify(exclusions, null, 2) + '\n', 'utf8'); } function formatFileTime(mtimeMs) { if (!Number.isFinite(mtimeMs)) return 'unknown'; return new Date(mtimeMs).toISOString(); } function cleanText(value) { return typeof value === 'string' ? value.trim() : ''; } function normalizeText(value) { return cleanText(value) .normalize('NFKC') .toLowerCase() .replace(/[\p{P}\p{S}\s]+/gu, ' ') .replace(/\s+/g, ' ') .trim(); } function toKeyText(value) { return cleanText(value) .normalize('NFKC') .replace(/[\p{P}\p{S}\s]+/gu, '') .trim(); } function getExclusionText(exclusion) { return cleanText(exclusion?.original_finding) || cleanText(exclusion?.title) || cleanText(exclusion?.suggestion) || cleanText(exclusion?.reason) || cleanText(exclusion?.note); } function normalizeExclusionEntry(exclusion, index) { const location = cleanText(exclusion?.location); const filePath = location ? location.split(':')[0] : ''; const role = cleanText(exclusion?.role); const text = getExclusionText(exclusion); const textKey = toKeyText(text); const fingerprint = [filePath || '*', role || '*', textKey || `entry-${index + 1}`].join('|'); return { ...exclusion, location: location || null, filePath, role: role || null, text, textKey, fingerprint, }; } function dedupeExclusions(exclusions) { const seen = new Set(); return exclusions.filter(exclusion => { if (seen.has(exclusion.fingerprint)) return false; seen.add(exclusion.fingerprint); return true; }); } function groupExclusionsForAI(exclusions) { const groups = new Map(); for (const exclusion of exclusions) { const groupKey = exclusion.textKey || exclusion.fingerprint; if (!groups.has(groupKey)) { groups.set(groupKey, { key: groupKey, text: exclusion.text || exclusion.location || exclusion.fingerprint, count: 0, paths: new Set(), roles: new Set(), samples: [], }); } const group = groups.get(groupKey); group.count += 1; if (exclusion.filePath) group.paths.add(exclusion.filePath); if (exclusion.role) group.roles.add(exclusion.role); if (group.samples.length < 2 && exclusion.text) group.samples.push(exclusion.text); } return [...groups.values()] .sort((a, b) => b.count - a.count || b.paths.size - a.paths.size || a.text.localeCompare(b.text)) .map(group => ({ text: group.text, count: group.count, paths: [...group.paths].sort(), roles: [...group.roles].sort(), samples: group.samples, })); } function buildExclusionContext(exclusions) { if (exclusions.length === 0) { return { rawCount: 0, uniqueCount: 0, groups: [], prompt: '', }; } const normalized = exclusions.map((exclusion, index) => normalizeExclusionEntry(exclusion, index)); const unique = dedupeExclusions(normalized); const groups = groupExclusionsForAI(unique); const topGroups = groups.slice(0, 12).map(group => ({ text: group.text, count: group.count, paths: group.paths.slice(0, 4), roles: group.roles.slice(0, 3), samples: group.samples.slice(0, 2), })); const omitted = groups.length - topGroups.length; const promptLines = [ `已知誤報清單(原始 ${exclusions.length} 筆,整理後 ${unique.length} 筆,分成 ${groups.length} 類):`, ...topGroups.map((group, index) => { const parts = [ `${index + 1}. ${group.text}`, `count=${group.count}`, ]; if (group.paths.length > 0) parts.push(`paths=${group.paths.join(', ')}`); if (group.roles.length > 0) parts.push(`roles=${group.roles.join(', ')}`); if (group.samples.length > 0) parts.push(`samples=${group.samples.join(' | ')}`); return `- ${parts.join(' ; ')}`; }), ]; if (omitted > 0) { promptLines.push(`- 另有 ${omitted} 類相似排除條目未展開,請依上述群組規則推論。`); } return { rawCount: exclusions.length, uniqueCount: unique.length, groupCount: groups.length, groups: topGroups, prompt: promptLines.join('\n'), }; } /** * 讀取舊 findings(從來源分支的 cloned repoDir 中的 FINDINGS_PATH) */ export function loadOldFindings(workspace) { const fullPath = path.join(workspace, FINDINGS_PATH); const old = readJSONArray(fullPath, '舊 findings ').map(f => ({ ...f, is_new: false })); if (fs.existsSync(fullPath)) { const stat = fs.statSync(fullPath); line(`讀取舊 findings 檔案: ${fullPath}`); line(`舊 findings 檔案資訊: bytes=${stat.size} mtime=${formatFileTime(stat.mtimeMs)} path=${path.relative(workspace, fullPath) || fullPath}`); } else { warn(`舊 findings 檔案不存在: ${fullPath}`); } ok(`讀取舊 findings: ${old.length} 筆`); return old; } /** * 合併新舊 findings,以 (role + location + suggestion前50字) 為 key 去除重複 */ export function mergeFindings(oldFindings, newFindings) { const key = f => `${f.role}|${f.location}|${String(f.suggestion).slice(0, 50)}`; const seen = new Set(oldFindings.map(key)); const deduped = newFindings.filter(f => { if (seen.has(key(f))) return false; seen.add(key(f)); return true; }); const merged = [...oldFindings, ...deduped]; ok(`合併結果: 舊=${oldFindings.length} 新(去重後)=${deduped.length} 總計=${merged.length}`); return merged; } /** * 依等級排序(critical > warning > info) */ export function sortByLevel(findings) { return [...findings].sort((a, b) => LEVELS.indexOf(a.level) - LEVELS.indexOf(b.level)); } /** * AI 呼叫失敗時的統一降級處理 */ function fallback(label, findings, e) { const status = e.response?.status; const reason = (status === 402 || status === 429) ? `${status} 額度/限流` : e.message; warn(`${label}失敗(${reason}),降級:保留所有問題`); return findings; } /** 只保留 AI 需要的欄位,減少 token 用量 */ function toAIPayload(findings) { return findings.map(({ level, role, location, suggestion }) => ({ level, role, location, suggestion })); } /** * 呼叫 LLM 進行語意去重,失敗時降級回傳原始 findings */ export async function deduplicateWithAI(findings) { if (findings.length === 0) return findings; const systemPrompt = `移除語意重複的程式碼審查問題(JSON 陣列)。保留等級較高者(critical > warning > info)。只回傳去重後的 JSON 陣列。`; try { const result = await chatJSON(systemPrompt, JSON.stringify(toAIPayload(findings))); if (Array.isArray(result) && result.length > 0) { ok(`AI 去重: ${findings.length} -> ${result.length} 筆`); // 以 location+suggestion 為 key,將原始 findings 的完整欄位(含 is_new)補回 const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f])); return result.map(r => origMap.get(`${r.location}|${String(r.suggestion).slice(0, 50)}`) ?? r); } throw new Error('AI 回傳空陣列'); } catch (e) { return fallback('AI 去重', findings, e); } } /** * 讀取排除問題檔案(從來源分支的 cloned repoDir 中的 EXCLUSIONS_PATH) */ export function loadExclusions(workspace, repoState = null, mirrorWorkspace = null) { const fullPath = path.join(workspace, EXCLUSIONS_PATH); if (!fs.existsSync(fullPath)) { warn(`排除問題檔案不存在,視為空: ${fullPath}`); if (repoState) { const branch = repoState.branch || 'detached'; const shortSha = repoState.shortSha || repoState.headSha || 'unknown'; line(`來源分支狀態: branch=${branch} commit=${shortSha} commit_time=${repoState.commitTime || 'unknown'}`); } ok('讀取排除問題: raw=0 normalized=0 筆'); return []; } let exclusions = []; let rawCount = 0; try { const stat = fs.statSync(fullPath); const data = JSON.parse(fs.readFileSync(fullPath, 'utf8')); const sourceFormat = detectExclusionSource(data); const normalizedSource = normalizeExclusions(data); rawCount = normalizedSource.length; exclusions = dedupeExclusions(normalizedSource.map((exclusion, index) => normalizeExclusionEntry(exclusion, index))); const branch = repoState?.branch || 'detached'; const shortSha = repoState?.shortSha || repoState?.headSha || 'unknown'; const commitTime = repoState?.commitTime || 'unknown'; line(`讀取排除問題檔案: ${fullPath}`); line(`來源分支狀態: branch=${branch} commit=${shortSha} commit_time=${commitTime}`); line(`檔案資訊: bytes=${stat.size} mtime=${formatFileTime(stat.mtimeMs)} raw=${rawCount} normalized=${exclusions.length} path=${path.relative(workspace, fullPath) || fullPath}`); if (sourceFormat !== 'array') { writeCanonicalExclusions(fullPath, normalizedSource); if (mirrorWorkspace && path.resolve(mirrorWorkspace) !== path.resolve(workspace)) { const mirrorPath = path.join(mirrorWorkspace, EXCLUSIONS_PATH); fs.mkdirSync(path.dirname(mirrorPath), { recursive: true }); writeCanonicalExclusions(mirrorPath, normalizedSource); } line(`排除問題格式已修正為頂層陣列: source=${sourceFormat} -> array`); } } catch (e) { warn(`讀取排除問題失敗: ${e.message},視為空: ${fullPath}`); exclusions = []; } const summary = buildExclusionContext(exclusions); ok(`讀取排除問題: raw=${rawCount} normalized=${exclusions.length} groups=${summary.groupCount} 筆`); return exclusions; } /** * 套用排除規則,過濾掉符合排除條件的 findings * location 只比對檔案路徑(忽略行數),suggestion 省略時視為萬用 */ export function applyExclusions(findings, exclusions) { if (exclusions.length === 0) return findings; const before = findings.length; const filtered = findings.filter(f => !exclusions.some(ex => { const fPath = String(f.location).split(':')[0]; const exPath = ex.filePath || (ex.location ? String(ex.location).split(':')[0] : null); const findingText = normalizeText(f.suggestion || f.title || ''); const exclusionText = ex.textKey || normalizeText(ex.text || ex.suggestion || ex.title || ''); const locationMatches = (!exPath || fPath === exPath); const roleMatches = (!ex.role || ex.role === f.role); const textMatches = !exclusionText || !findingText || findingText.includes(exclusionText) || exclusionText.includes(findingText); return locationMatches && roleMatches && (exPath || ex.role ? true : textMatches); })); ok(`排除過濾: ${before} -> ${filtered.length} 筆(排除 ${before - filtered.length} 筆)`); return filtered; } /** * 呼叫 AI 判斷哪些問題是誤報或不需處理,失敗時降級回傳原始 findings */ export async function filterFalsePositivesWithAI(findings, exclusions = [], chatFn = chatJSON) { if (findings.length === 0) return findings; const exclusionContext = buildExclusionContext(exclusions); const exclusionHint = exclusionContext.prompt ? `\n${exclusionContext.prompt}\n規則:若 finding 與上述任何一類的路徑、角色或描述高度相似,優先視為誤報或不適用。` : ''; const systemPrompt = `判斷以下程式碼審查問題是否為誤報或不適用(如已正確使用 secrets、CI/CD 必要權限等),移除後只回傳需保留的 JSON 陣列。${exclusionHint}`; try { const result = await chatFn(systemPrompt, JSON.stringify(toAIPayload(findings))); if (Array.isArray(result) && result.length > 0) { ok(`AI 誤報過濾: ${findings.length} -> ${result.length} 筆`); const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f])); return result.map(r => origMap.get(`${r.location}|${String(r.suggestion).slice(0, 50)}`) ?? r); } throw new Error('AI 回傳空陣列或非陣列'); } catch (e) { return fallback('AI 誤報過濾', findings, e); } }