Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5876154dbb | |||
| 0e0cd252b0 | |||
| fcc8d59f7a | |||
| a92b6440ff | |||
| 8d8ace636e | |||
| fdeceee52f | |||
| fade942267 | |||
| 4834396652 | |||
| 0108a05886 |
@@ -208,5 +208,20 @@
|
||||
"role": "Zara",
|
||||
"location": "app/main.js",
|
||||
"suggestion": "deduplicateWithAI 和 filterFalsePositivesWithAI 為循序依賴流程(去重後才能過濾),無法平行化"
|
||||
},
|
||||
{
|
||||
"role": "Leo",
|
||||
"location": "app/comments.js",
|
||||
"suggestion": "buildTable 函式已在 comments.js 第 13 行定義,非未定義或未匯入,不會導致執行時錯誤"
|
||||
},
|
||||
{
|
||||
"role": "Maya",
|
||||
"location": "app/gitea.js",
|
||||
"suggestion": "filterDiff 的單元測試已在 gitea.test.js 補齊,涵蓋過濾 .gitea/、不誤過濾其他路徑、全部排除、空 diff 四種情境"
|
||||
},
|
||||
{
|
||||
"role": "Leo",
|
||||
"location": "TODO.md",
|
||||
"suggestion": "TODO.md 的階段編號僅供內部開發追蹤,無外部文件引用,階段編號調整不影響任何外部一致性"
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1,37 +1,23 @@
|
||||
[
|
||||
{
|
||||
"level": "warning",
|
||||
"role": "Aria",
|
||||
"location": "app/main.js:60",
|
||||
"suggestion": "已移除的註解 `// 載入舊 findings,用於 AI 誤報過濾參考` 提供了該程式碼區塊的上下文資訊。建議保留此類註解或以 JSDoc 形式補充,以提升程式碼可讀性與維護性。",
|
||||
"is_new": true
|
||||
},
|
||||
{
|
||||
"level": "warning",
|
||||
"role": "Aria",
|
||||
"location": "app/main.js:64",
|
||||
"suggestion": "已移除的註解 `// Clone repo 以讀取舊 findings 與排除清單` 說明了呼叫 `cloneRepo` 的目的。建議保留此類註解或以 JSDoc 形式補充,以提升程式碼可讀性與維護性。",
|
||||
"is_new": true
|
||||
},
|
||||
{
|
||||
"level": "warning",
|
||||
"role": "Maya",
|
||||
"location": "app/gitea.js:14",
|
||||
"suggestion": "更新 `filterDiff` 的測試。過濾邏輯從正則表達式匹配改為 `startsWith`,這是一個功能性變更。需要新增或修改測試案例,以確保新的 `startsWith` 邏輯能正確過濾或保留 diff 區塊,特別是針對邊界條件和不同前綴的匹配情況。",
|
||||
"level": "info",
|
||||
"role": "Rex",
|
||||
"location": "app/gitea.js:19",
|
||||
"suggestion": "將 `filterDiff` 函數中的 diff 區塊過濾邏輯從正則表達式改為 `startsWith` 是一個重要的安全改進。這可以有效防止潛在的正則表達式注入攻擊,即使 `excludePrefixes` 參數未來可能受到外部控制,也能確保過濾邏輯的安全性。",
|
||||
"is_new": true
|
||||
},
|
||||
{
|
||||
"level": "info",
|
||||
"role": "Rex",
|
||||
"location": "action.yaml",
|
||||
"suggestion": "此 Action 需要 `contents: write`、`pull-requests: write` 和 `issues: write` 權限。這些權限對於 Action 的正常運作是必要的(例如寫入 findings.json、發布評論),但屬於較廣泛的權限。建議在文件或使用說明中明確指出這些權限的需求及其潛在影響,確保使用者了解並接受。",
|
||||
"is_new": false
|
||||
"location": "app/main.js:46",
|
||||
"suggestion": "在將 Git Diff 內容傳遞給 AI 進行分析之前,明確呼叫 `filterDiff` 函數以排除 `.gitea/` 等敏感路徑,是一個良好的安全實踐。這有助於避免 AI 分析到不必要的或包含敏感配置的非業務程式碼,降低潛在的資訊洩漏風險。",
|
||||
"is_new": true
|
||||
},
|
||||
{
|
||||
"level": "info",
|
||||
"role": "Leo",
|
||||
"location": "app/main.js:16",
|
||||
"suggestion": "在 `main` 函式中,移除了多個高層次的註解,例如 `// 偵測 LLM`、`// 載入角色` 等。雖然這些註解描述了接下來的程式碼區塊,但對於理解整個 pipeline 的執行流程和各步驟的目標,它們提供了有用的指引。建議恢復這些高層次註解,以提升程式碼的整體可讀性和維護性,特別是對於新加入的開發者。",
|
||||
"role": "Rex",
|
||||
"location": "app/main.js:98",
|
||||
"suggestion": "新增對 `findings.json` 和 `exclusions.json` 檔案進行 JSON 格式驗證的步驟,並在格式錯誤時嘗試重置和備份,這是一個重要的健壯性與安全措施。它能防止因檔案損壞或惡意修改導致的服務中斷或行為異常,確保系統的穩定性和資料的完整性。",
|
||||
"is_new": true
|
||||
}
|
||||
]
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
6. API Key 支援逗號分隔傳入多個,隨機順序各嘗試一次,全部失敗則 exit 1
|
||||
7. 讀取 Git Diff 時排除 `.gitea/` 資料夾內的所有檔案,避免 AI 分析 workflow 設定等非業務程式碼
|
||||
8. 階段五完成後驗證 `findings.json` 與 `exclusions.json` 是否為合法 JSON 格式,格式錯誤時先嘗試重置為空陣列並備份原檔,修正失敗才 exit 1
|
||||
9. 傳給 AI 的 findings 只保留必要欄位(level、role、location、suggestion),排除 `is_new` 等內部欄位;system prompt 精簡為指令核心;exclusions hint 只傳 location 與 suggestion,減少 token 用量
|
||||
|
||||
# 使用說明
|
||||
|
||||
|
||||
@@ -49,3 +49,8 @@
|
||||
- 目標:所有平台的 API Key 支援逗號分隔傳入多個,隨機順序各嘗試一次,單一 Key 失敗時自動換下一個,全部失敗則 exit 1。
|
||||
- 驗收:log 中能看到「key[N/M] 失敗」等訊息,換 key 後繼續執行;傳入單一 Key 時行為與原本相同;全部 Key 失敗時 log「所有 API Key 均失敗,終止流程」且 workflow 狀態為失敗。
|
||||
- 未驗收
|
||||
|
||||
## 階段十一:壓縮 AI 傳入內容減少 token 用量
|
||||
- 目標:傳給 AI 的 findings 只保留必要欄位(level、role、location、suggestion);system prompt 精簡為指令核心;exclusions hint 只傳 location 與 suggestion;AI 回傳後補回原始完整欄位(含 is_new)。
|
||||
- 驗收:AI 呼叫的 payload 不含 is_new 等內部欄位,去重與誤報過濾後的 findings 仍保有完整欄位供後續流程使用。
|
||||
- 未驗收
|
||||
|
||||
+32
-38
@@ -18,25 +18,31 @@ export async function analyzeWithRole(role, diff) {
|
||||
}
|
||||
|
||||
/**
|
||||
* 讀取舊 findings(從 workspace 的 FINDINGS_PATH)
|
||||
* 讀取 JSON 陣列檔案,失敗或不存在時回傳空陣列
|
||||
*/
|
||||
export function loadOldFindings(workspace) {
|
||||
const fullPath = path.join(workspace, FINDINGS_PATH);
|
||||
function readJSONArray(fullPath, label) {
|
||||
if (!fs.existsSync(fullPath)) {
|
||||
console.log(' 舊 findings 檔案不存在,視為空');
|
||||
console.log(` ${label}檔案不存在,視為空`);
|
||||
return [];
|
||||
}
|
||||
try {
|
||||
const data = JSON.parse(fs.readFileSync(fullPath, 'utf8'));
|
||||
const old = (Array.isArray(data) ? data : []).map(f => ({ ...f, is_new: false }));
|
||||
console.log(` 讀取舊 findings: ${old.length} 筆`);
|
||||
return old;
|
||||
return Array.isArray(data) ? data : [];
|
||||
} catch (e) {
|
||||
console.log(` ⚠️ 讀取舊 findings 失敗: ${e.message},視為空`);
|
||||
console.log(` ⚠️ 讀取${label}失敗: ${e.message},視為空`);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 讀取舊 findings(從 workspace 的 FINDINGS_PATH)
|
||||
*/
|
||||
export function loadOldFindings(workspace) {
|
||||
const old = readJSONArray(path.join(workspace, FINDINGS_PATH), '舊 findings ').map(f => ({ ...f, is_new: false }));
|
||||
console.log(` 讀取舊 findings: ${old.length} 筆`);
|
||||
return old;
|
||||
}
|
||||
|
||||
/**
|
||||
* 合併新舊 findings,以 (role + location + suggestion前50字) 為 key 去除重複
|
||||
*/
|
||||
@@ -70,22 +76,26 @@ function fallback(label, findings, e) {
|
||||
return findings;
|
||||
}
|
||||
|
||||
/** 只保留 AI 需要的欄位,減少 token 用量 */
|
||||
function toAIPayload(findings) {
|
||||
return findings.map(({ level, role, location, suggestion }) => ({ level, role, location, suggestion }));
|
||||
}
|
||||
|
||||
/**
|
||||
* 呼叫 LLM 進行語意去重,失敗時降級回傳原始 findings
|
||||
*/
|
||||
export async function deduplicateWithAI(findings) {
|
||||
if (findings.length === 0) return findings;
|
||||
|
||||
const systemPrompt = `你是一位程式碼審查問題去重專家。
|
||||
給你一份問題清單(JSON 陣列),請移除語意重複的問題(即使描述文字不同,但指的是同一個問題)。
|
||||
保留等級較高的版本,優先保留 critical > warning > info。
|
||||
只回傳去重後的 JSON 陣列,不要有其他文字。`;
|
||||
const systemPrompt = `移除語意重複的程式碼審查問題(JSON 陣列)。保留等級較高者(critical > warning > info)。只回傳去重後的 JSON 陣列。`;
|
||||
|
||||
try {
|
||||
const result = await chatJSON(systemPrompt, `以下是問題清單,請去除語意重複的項目:\n\n${JSON.stringify(findings, null, 2)}`);
|
||||
const result = await chatJSON(systemPrompt, JSON.stringify(toAIPayload(findings)));
|
||||
if (Array.isArray(result) && result.length > 0) {
|
||||
console.log(` AI 去重: ${findings.length} -> ${result.length} 筆`);
|
||||
return result;
|
||||
// 以 location+suggestion 為 key,將原始 findings 的完整欄位(含 is_new)補回
|
||||
const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f]));
|
||||
return result.map(r => origMap.get(`${r.location}|${String(r.suggestion).slice(0, 50)}`) ?? r);
|
||||
}
|
||||
throw new Error('AI 回傳空陣列');
|
||||
} catch (e) {
|
||||
@@ -97,20 +107,9 @@ export async function deduplicateWithAI(findings) {
|
||||
* 讀取排除問題檔案(從 workspace 的 EXCLUSIONS_PATH)
|
||||
*/
|
||||
export function loadExclusions(workspace) {
|
||||
const fullPath = path.join(workspace, EXCLUSIONS_PATH);
|
||||
if (!fs.existsSync(fullPath)) {
|
||||
console.log(' 排除問題檔案不存在,跳過過濾');
|
||||
return [];
|
||||
}
|
||||
try {
|
||||
const data = JSON.parse(fs.readFileSync(fullPath, 'utf8'));
|
||||
const exclusions = Array.isArray(data) ? data : [];
|
||||
console.log(` 讀取排除問題: ${exclusions.length} 筆`);
|
||||
return exclusions;
|
||||
} catch (e) {
|
||||
console.log(` ⚠️ 讀取排除問題失敗: ${e.message},跳過過濾`);
|
||||
return [];
|
||||
}
|
||||
const exclusions = readJSONArray(path.join(workspace, EXCLUSIONS_PATH), '排除問題');
|
||||
console.log(` 讀取排除問題: ${exclusions.length} 筆`);
|
||||
return exclusions;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -136,22 +135,17 @@ export async function filterFalsePositivesWithAI(findings, exclusions = []) {
|
||||
if (findings.length === 0) return findings;
|
||||
|
||||
const exclusionHint = exclusions.length > 0
|
||||
? `\n\n以下是已知的誤報或不需處理的問題清單(供參考,相同檔案路徑且語意相近的問題應一併排除):\n${JSON.stringify(exclusions, null, 2)}`
|
||||
? `\n已知誤報(相同路徑且語意相近者一併排除):\n${JSON.stringify(exclusions.map(({ location, suggestion }) => ({ location, suggestion })))}`
|
||||
: '';
|
||||
|
||||
const systemPrompt = `你是一位資深程式碼審查專家,負責判斷審查問題是否為誤報或不需處理。
|
||||
給你一份問題清單(JSON 陣列),每筆包含 level、role、location、suggestion。
|
||||
請移除以下類型的問題:
|
||||
1. 誤報:問題描述與實際程式碼不符(例如:程式碼已正確使用環境變數或 secrets,卻被標記為硬編碼敏感資料)
|
||||
2. 不適用:問題在此專案情境下不需處理(例如:CI/CD action 本來就需要透過環境變數傳遞 token)
|
||||
3. 與已知誤報清單語意相近的問題(檔案路徑相同且建議內容相似)
|
||||
只回傳需要保留的問題 JSON 陣列,不要有其他文字。${exclusionHint}`;
|
||||
const systemPrompt = `判斷以下程式碼審查問題是否為誤報或不適用(如已正確使用 secrets、CI/CD 必要權限等),移除後只回傳需保留的 JSON 陣列。${exclusionHint}`;
|
||||
|
||||
try {
|
||||
const result = await chatJSON(systemPrompt, `請判斷以下問題清單,移除誤報或不需處理的問題:\n\n${JSON.stringify(findings, null, 2)}`);
|
||||
const result = await chatJSON(systemPrompt, JSON.stringify(toAIPayload(findings)));
|
||||
if (Array.isArray(result) && result.length > 0) {
|
||||
console.log(` AI 誤報過濾: ${findings.length} -> ${result.length} 筆`);
|
||||
return result;
|
||||
const origMap = new Map(findings.map(f => [`${f.location}|${String(f.suggestion).slice(0, 50)}`, f]));
|
||||
return result.map(r => origMap.get(`${r.location}|${String(r.suggestion).slice(0, 50)}`) ?? r);
|
||||
}
|
||||
throw new Error('AI 回傳空陣列或非陣列');
|
||||
} catch (e) {
|
||||
|
||||
+2
-2
@@ -3,6 +3,8 @@ import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { GITEA_SERVER_URL, GITEA_REPOSITORY, GITEA_TOKEN, PR_HEAD_BRANCH, FINDINGS_PATH } from './config.js';
|
||||
|
||||
const remoteUrl = `${GITEA_SERVER_URL.replace(/\/$/, '')}/${GITEA_REPOSITORY}.git`;
|
||||
|
||||
function makeRunner(spawn) {
|
||||
return function run(args, cwd, env) {
|
||||
const opts = { cwd, encoding: 'utf8' };
|
||||
@@ -30,7 +32,6 @@ function withAskpass(workspace, fn) {
|
||||
*/
|
||||
export function cloneRepo(workspace, _spawnSync = spawnSync) {
|
||||
const run = makeRunner(_spawnSync);
|
||||
const remoteUrl = `${GITEA_SERVER_URL.replace(/\/$/, '')}/${GITEA_REPOSITORY}.git`;
|
||||
const repoDir = path.join(workspace, 'repo');
|
||||
|
||||
return withAskpass(workspace, credEnv => {
|
||||
@@ -48,7 +49,6 @@ export function cloneRepo(workspace, _spawnSync = spawnSync) {
|
||||
|
||||
export async function commitAndPush(workspace, _spawnSync = spawnSync) {
|
||||
const run = makeRunner(_spawnSync);
|
||||
const remoteUrl = `${GITEA_SERVER_URL.replace(/\/$/, '')}/${GITEA_REPOSITORY}.git`;
|
||||
|
||||
try {
|
||||
const repoDir = cloneRepo(workspace, _spawnSync);
|
||||
|
||||
@@ -6,6 +6,10 @@ const httpsAgent = GITEA_SKIP_TLS_VERIFY ? new https.Agent({ rejectUnauthorized:
|
||||
const headers = () => ({ Authorization: `token ${GITEA_TOKEN}`, 'Content-Type': 'application/json' });
|
||||
const api = (path) => `${GITEA_SERVER_URL.replace(/\/$/, '')}/api/v1${path}`;
|
||||
|
||||
/**
|
||||
* 取得 PR 的原始 Git Diff 內容。
|
||||
* 注意:回傳值未經路徑過濾,呼叫端須使用 filterDiff 排除敏感路徑(如 .gitea/)後再傳給 AI。
|
||||
*/
|
||||
export async function getPRDiff() {
|
||||
const resp = await axios.get(api(`/repos/${GITEA_REPOSITORY}/pulls/${PR_NUMBER}.diff`), { headers: headers(), timeout: 60000, httpsAgent });
|
||||
return resp.data;
|
||||
|
||||
+30
-5
@@ -2,13 +2,10 @@ import { describe, it, afterEach, mock } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import axios from 'axios';
|
||||
|
||||
// gitea.js reads env vars at module load time (ESM cache), so we test
|
||||
// the actual values baked in at import time and verify behavior via axios mocks.
|
||||
|
||||
afterEach(() => mock.restoreAll());
|
||||
|
||||
describe('gitea', async () => {
|
||||
const { getPRDiff, postComment } = await import('./gitea.js');
|
||||
const { getPRDiff, filterDiff, postComment } = await import('./gitea.js');
|
||||
|
||||
it('getPRDiff calls Gitea diff API with Authorization header', async () => {
|
||||
let capturedUrl, capturedOpts;
|
||||
@@ -48,7 +45,6 @@ describe('gitea', async () => {
|
||||
return { data: '' };
|
||||
});
|
||||
await getPRDiff();
|
||||
// httpsAgent is undefined when GITEA_SKIP_TLS_VERIFY !== 'true'
|
||||
assert.equal(capturedOpts.httpsAgent, undefined);
|
||||
});
|
||||
|
||||
@@ -62,3 +58,32 @@ describe('gitea', async () => {
|
||||
await assert.rejects(() => postComment('test'), /api error/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('filterDiff', async () => {
|
||||
const { filterDiff } = await import('./gitea.js');
|
||||
|
||||
const block = (file) => `diff --git a/${file} b/${file}\n--- a/${file}\n+++ b/${file}\n@@ -1 +1 @@\n-old\n+new\n`;
|
||||
|
||||
it('filters out .gitea/ blocks', () => {
|
||||
const diff = block('.gitea/workflows/review.yaml') + block('src/index.js');
|
||||
const result = filterDiff(diff, ['.gitea/']);
|
||||
assert.ok(!result.includes('.gitea/'));
|
||||
assert.ok(result.includes('src/index.js'));
|
||||
});
|
||||
|
||||
it('does not filter non-.gitea/ blocks', () => {
|
||||
const diff = block('src/index.js') + block('README.md');
|
||||
const result = filterDiff(diff, ['.gitea/']);
|
||||
assert.equal(result, diff);
|
||||
});
|
||||
|
||||
it('returns empty string when all blocks are excluded', () => {
|
||||
const diff = block('.gitea/workflows/review.yaml') + block('.gitea/ai-review/findings.json');
|
||||
const result = filterDiff(diff, ['.gitea/']);
|
||||
assert.equal(result, '');
|
||||
});
|
||||
|
||||
it('returns empty string for empty diff', () => {
|
||||
assert.equal(filterDiff('', ['.gitea/']), '');
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user