code-review/app/findings.js

import fs from 'fs';
import path from 'path';
import { chatJSON } from './llm.js';
import { FINDINGS_PATH, EXCLUSIONS_PATH } from './config.js';
import { line, ok, warn } from './log.js';

const LEVELS = ['critical', 'warning', 'info'];

/**
 * 用單一角色分析 diff，回傳 findings 陣列
 */
export async function analyzeWithRole(role, diff) {
  line(`[${role.name}] 開始分析`);
  const findings = await chatJSON(role.system_prompt, `以下是 Git Diff 內容：\n\n${diff}`);
  const valid = findings.filter(f => f.level && f.role && f.location && f.suggestion)
    .map(f => ({ ...f, is_new: true }));
  ok(`[${role.name}] 找到 ${valid.length} 個問題`);
  return valid;
}

/**
 * 讀取 JSON 陣列檔案，失敗或不存在時回傳空陣列
 */
function readJSONArray(fullPath, label) {
  if (!fs.existsSync(fullPath)) {
    warn(`${label}檔案不存在，視為空`);
    return [];
  }
  try {
    const data = JSON.parse(fs.readFileSync(fullPath, 'utf8'));
    return Array.isArray(data) ? data : [];
  } catch (e) {
    warn(`讀取${label}失敗: ${e.message}，視為空`);
    return [];
  }
}

function normalizeExclusions(data) {
  if (Array.isArray(data)) return data;
  if (data && Array.isArray(data.excluded_findings)) return data.excluded_findings;
  return [];
}

function formatFileTime(mtimeMs) {
  if (!Number.isFinite(mtimeMs)) return 'unknown';
  return new Date(mtimeMs).toISOString();
}

function cleanText(value) {
  return typeof value === 'string' ? value.trim() : '';
}

function normalizeText(value) {
  return cleanText(value)
    .normalize('NFKC')
    .toLowerCase()
    .replace(/[\p{P}\p{S}\s]+/gu, ' ')
    .replace(/\s+/g, ' ')
    .trim();
}

function toKeyText(value) {
  return cleanText(value)
    .normalize('NFKC')
    .replace(/[\p{P}\p{S}\s]+/gu, '')
    .trim();
}

function getExclusionText(exclusion) {
  return cleanText(exclusion?.original_finding)
    || cleanText(exclusion?.title)
    || cleanText(exclusion?.suggestion)
    || cleanText(exclusion?.reason)
    || cleanText(exclusion?.note);
}

function normalizeExclusionEntry(exclusion, index) {
  const location = cleanText(exclusion?.location);
  const filePath = location ? location.split(':')[0] : '';
  const role = cleanText(exclusion?.role);
  const text = getExclusionText(exclusion);
  const textKey = toKeyText(text);
  const fingerprint = [filePath || '*', role || '*', textKey || `entry-${index + 1}`].join('|');
  return {
    ...exclusion,
    location: location || null,
    filePath,
    role: role || null,
    text,
    textKey,
    fingerprint,
  };
}

function dedupeExclusions(exclusions) {
  const seen = new Set();
  return exclusions.filter(exclusion => {
    if (seen.has(exclusion.fingerprint)) return false;
    seen.add(exclusion.fingerprint);
    return true;
  });
}

function groupExclusionsForAI(exclusions) {
  const groups = new Map();
  for (const exclusion of exclusions) {
    const groupKey = exclusion.textKey || exclusion.fingerprint;
    if (!groups.has(groupKey)) {
      groups.set(groupKey, {
        key: groupKey,
        text: exclusion.text || exclusion.location || exclusion.fingerprint,
        count: 0,
        paths: new Set(),
        roles: new Set(),
        samples: [],
      });
    }
    const group = groups.get(groupKey);
    group.count += 1;
    if (exclusion.filePath) group.paths.add(exclusion.filePath);
    if (exclusion.role) group.roles.add(exclusion.role);
    if (group.samples.length < 2 && exclusion.text) group.samples.push(exclusion.text);
  }

  return [...groups.values()]
    .sort((a, b) => b.count - a.count || b.paths.size - a.paths.size || a.text.localeCompare(b.text))
    .map(group => ({
      text: group.text,
      count: group.count,
      paths: [...group.paths].sort(),
      roles: [...group.roles].sort(),
      samples: group.samples,
    }));
}

function buildExclusionContext(exclusions) {
  if (exclusions.length === 0) {
    return {
      rawCount: 0,
      uniqueCount: 0,
      groups: [],
      prompt: '',
    };
  }

  const normalized = exclusions.map((exclusion, index) => normalizeExclusionEntry(exclusion, index));
  const unique = dedupeExclusions(normalized);
  const groups = groupExclusionsForAI(unique);
  const topGroups = groups.slice(0, 12).map(group => ({
    text: group.text,
    count: group.count,
    paths: group.paths.slice(0, 4),
    roles: group.roles.slice(0, 3),
    samples: group.samples.slice(0, 2),
  }));
  const omitted = groups.length - topGroups.length;
  const promptLines = [
    `已知誤報清單（原始 ${exclusions.length} 筆，整理後 ${unique.length} 筆，分成 ${groups.length} 類）:`,
    ...topGroups.map((group, index) => {
      const parts = [
        `${index + 1}. ${group.text}`,
        `count=${group.count}`,
      ];
      if (group.paths.length > 0) parts.push(`paths=${group.paths.join(', ')}`);
      if (group.roles.length > 0) parts.push(`roles=${group.roles.join(', ')}`);
      if (group.samples.length > 0) parts.push(`samples=${group.samples.join(' | ')}`);
      return `- ${parts.join(' ; ')}`;
    }),
  ];
  if (omitted > 0) {
    promptLines.push(`- 另有 ${omitted} 類相似排除條目未展開，請依上述群組規則推論。`);
  }

  return {
    rawCount: exclusions.length,
    uniqueCount: unique.length,
    groupCount: groups.length,
    groups: topGroups,
    prompt: promptLines.join('\n'),
  };
}

/**
 * 讀取舊 findings（從來源分支的 cloned repoDir 中的 FINDINGS_PATH）
 */
export function loadOldFindings(workspace) {
  const fullPath = path.join(workspace, FINDINGS_PATH);
  const old = readJSONArray(fullPath, '舊 findings ').map(f => ({ ...f, is_new: false }));
  if (fs.existsSync(fullPath)) {
    const stat = fs.statSync(fullPath);
    line(`讀取舊 findings 檔案: ${fullPath}`);
    line(`舊 findings 檔案資訊: bytes=${stat.size} mtime=${formatFileTime(stat.mtimeMs)} path=${path.relative(workspace, fullPath) || fullPath}`);
  } else {
    warn(`舊 findings 檔案不存在: ${fullPath}`);
  }
  ok(`讀取舊 findings: ${old.length} 筆`);
  return old;
}

/**
 * 合併新舊 findings，以 (role + location + suggestion前50字) 為 key 去除重複
 */
export function mergeFindings(oldFindings, newFindings) {
  const key = f => `${f.role}|${f.location}|${String(f.suggestion).slice(0, 50)}`;
  const seen = new Set(oldFindings.map(key));
  const deduped = newFindings.filter(f => {
    if (seen.has(key(f))) return false;
    seen.add(key(f));
    return true;
  });
  const merged = [...oldFindings, ...deduped];
  ok(`合併結果: 舊=${oldFindings.length} 新(去重後)=${deduped.length} 總計=${merged.length}`);
  return merged;
}

/**
 * 依等級排序（critical > warning > info）
 */
export function sortByLevel(findings) {
  return [...findings].sort((a, b) => LEVELS.indexOf(a.level) - LEVELS.indexOf(b.level));
}

/**
 * AI 呼叫失敗時的統一降級處理
 */
function fallback(label, findings, e) {
  const status = e.response?.status;
  const reason = (status === 402 || status === 429) ? `${status} 額度/限流` : e.message;
  warn(`${label}失敗（${reason}），降級：保留所有問題`);
  return findings;
}

/** 只保留 AI 需要的欄位，減少 token 用量 */
function toAIPayload(findings) {
  return findings.map(({ level, role, location, suggestion }) => ({ level, role, location, suggestion }));
}

/**
 * 呼叫 LLM 進行語意去重，失敗時降級回傳原始 findings
 */
export async function deduplicateWithAI(findings) {
  if (findings.length === 0) return findings;

  const systemPrompt = `移除語意重複的程式碼審查問題（JSON 陣列）。保留等級較高者（critical > warning > info）。只回傳去重後的 JSON 陣列。`;

  try {
    const result = await chatJSON(systemPrompt, JSON.stringify(toAIPayload(findings)));
    if (Array.isArray(result) && result.length > 0) {
      ok(`AI 去重: ${findings.length} -> ${result.length} 筆`);
      // 以 location+suggestion 為 key，將原始 findings 的完整欄位（含 is_new）補回
      const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f]));
      return result.map(r => origMap.get(`${r.location}|${String(r.suggestion).slice(0, 50)}`) ?? r);
    }
    throw new Error('AI 回傳空陣列');
  } catch (e) {
    return fallback('AI 去重', findings, e);
  }
}

/**
 * 讀取排除問題檔案（從來源分支的 cloned repoDir 中的 EXCLUSIONS_PATH）
 */
export function loadExclusions(workspace, repoState = null) {
  const fullPath = path.join(workspace, EXCLUSIONS_PATH);
  if (!fs.existsSync(fullPath)) {
    warn(`排除問題檔案不存在，視為空: ${fullPath}`);
    if (repoState) {
      const branch = repoState.branch || 'detached';
      const shortSha = repoState.shortSha || repoState.headSha || 'unknown';
      line(`來源分支狀態: branch=${branch} commit=${shortSha} commit_time=${repoState.commitTime || 'unknown'}`);
    }
    ok('讀取排除問題: raw=0 normalized=0 筆');
    return [];
  }

  let exclusions = [];
  let rawCount = 0;
  try {
    const stat = fs.statSync(fullPath);
    const data = JSON.parse(fs.readFileSync(fullPath, 'utf8'));
    rawCount = Array.isArray(data) ? data.length : Array.isArray(data?.excluded_findings) ? data.excluded_findings.length : 0;
    exclusions = dedupeExclusions(normalizeExclusions(data).map((exclusion, index) => normalizeExclusionEntry(exclusion, index)));
    const branch = repoState?.branch || 'detached';
    const shortSha = repoState?.shortSha || repoState?.headSha || 'unknown';
    const commitTime = repoState?.commitTime || 'unknown';
    line(`讀取排除問題檔案: ${fullPath}`);
    line(`來源分支狀態: branch=${branch} commit=${shortSha} commit_time=${commitTime}`);
    line(`檔案資訊: bytes=${stat.size} mtime=${formatFileTime(stat.mtimeMs)} raw=${rawCount} normalized=${exclusions.length} path=${path.relative(workspace, fullPath) || fullPath}`);
  } catch (e) {
    warn(`讀取排除問題失敗: ${e.message}，視為空: ${fullPath}`);
    exclusions = [];
  }
  const summary = buildExclusionContext(exclusions);
  ok(`讀取排除問題: raw=${rawCount} normalized=${exclusions.length} groups=${summary.groupCount} 筆`);
  return exclusions;
}

/**
 * 套用排除規則，過濾掉符合排除條件的 findings
 * location 只比對檔案路徑（忽略行數），suggestion 省略時視為萬用
 */
export function applyExclusions(findings, exclusions) {
  if (exclusions.length === 0) return findings;
  const before = findings.length;
  const filtered = findings.filter(f => !exclusions.some(ex => {
    const fPath = String(f.location).split(':')[0];
    const exPath = ex.filePath || (ex.location ? String(ex.location).split(':')[0] : null);
    const findingText = normalizeText(f.suggestion || f.title || '');
    const exclusionText = ex.textKey || normalizeText(ex.text || ex.suggestion || ex.title || '');
    const locationMatches = (!exPath || fPath === exPath);
    const roleMatches = (!ex.role || ex.role === f.role);
    const textMatches = !exclusionText || !findingText || findingText.includes(exclusionText) || exclusionText.includes(findingText);
    return locationMatches && roleMatches && (exPath || ex.role ? true : textMatches);
  }));
  ok(`排除過濾: ${before} -> ${filtered.length} 筆（排除 ${before - filtered.length} 筆）`);
  return filtered;
}

/**
 * 呼叫 AI 判斷哪些問題是誤報或不需處理，失敗時降級回傳原始 findings
 */
export async function filterFalsePositivesWithAI(findings, exclusions = [], chatFn = chatJSON) {
  if (findings.length === 0) return findings;

  const exclusionContext = buildExclusionContext(exclusions);
  const exclusionHint = exclusionContext.prompt
    ? `\n${exclusionContext.prompt}\n規則：若 finding 與上述任何一類的路徑、角色或描述高度相似，優先視為誤報或不適用。`
    : '';

  const systemPrompt = `判斷以下程式碼審查問題是否為誤報或不適用（如已正確使用 secrets、CI/CD 必要權限等），移除後只回傳需保留的 JSON 陣列。${exclusionHint}`;

  try {
    const result = await chatFn(systemPrompt, JSON.stringify(toAIPayload(findings)));
    if (Array.isArray(result) && result.length > 0) {
      ok(`AI 誤報過濾: ${findings.length} -> ${result.length} 筆`);
      const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f]));
      return result.map(r => origMap.get(`${r.location}|${String(r.suggestion).slice(0, 50)}`) ?? r);
    }
    throw new Error('AI 回傳空陣列或非陣列');
  } catch (e) {
    return fallback('AI 誤報過濾', findings, e);
  }
}