Files
gitlore/src/core/note_parser.rs
Taylor Eernisse 7e0e6a91f2 refactor: extract unit tests into separate _tests.rs files
Move inline #[cfg(test)] mod tests { ... } blocks from 22 source files
into dedicated _tests.rs companion files, wired via:

    #[cfg(test)]
    #[path = "module_tests.rs"]
    mod tests;

This keeps implementation-focused source files leaner and more scannable
while preserving full access to private items through `use super::*;`.

Modules extracted:
  core:      db, note_parser, payloads, project, references, sync_run,
             timeline_collect, timeline_expand, timeline_seed
  cli:       list (55 tests), who (75 tests)
  documents: extractor (43 tests), regenerator
  embedding: change_detector, chunking
  gitlab:    graphql (wiremock async tests), transformers/issue
  ingestion: dirty_tracker, discussions, issues, mr_diffs

Also adds conflicts_with("explain_score") to the --detail flag in the
who command to prevent mutually exclusive flags from being combined.

All 629 unit tests pass. No behavior changes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 10:54:02 -05:00

239 lines
6.6 KiB
Rust

use std::sync::LazyLock;
use regex::Regex;
use rusqlite::Connection;
use tracing::debug;
use super::error::Result;
use super::time::now_ms;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParsedCrossRef {
pub reference_type: String,
pub target_entity_type: String,
pub target_iid: i64,
pub target_project_path: Option<String>,
}
#[derive(Debug, Default)]
pub struct ExtractResult {
pub inserted: usize,
pub skipped_unresolvable: usize,
pub parse_failures: usize,
}
static MENTIONED_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"mentioned in (?:(?P<project>[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P<sigil>[#!])(?P<iid>\d+)",
)
.expect("mentioned regex is valid")
});
static CLOSED_BY_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"closed by (?:(?P<project>[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P<sigil>[#!])(?P<iid>\d+)",
)
.expect("closed_by regex is valid")
});
pub fn parse_cross_refs(body: &str) -> Vec<ParsedCrossRef> {
let mut refs = Vec::new();
for caps in MENTIONED_RE.captures_iter(body) {
if let Some(parsed) = capture_to_cross_ref(&caps, "mentioned") {
refs.push(parsed);
}
}
for caps in CLOSED_BY_RE.captures_iter(body) {
if let Some(parsed) = capture_to_cross_ref(&caps, "closes") {
refs.push(parsed);
}
}
refs
}
fn capture_to_cross_ref(
caps: &regex::Captures<'_>,
reference_type: &str,
) -> Option<ParsedCrossRef> {
let sigil = caps.name("sigil")?.as_str();
let iid_str = caps.name("iid")?.as_str();
let iid: i64 = iid_str.parse().ok()?;
let project = caps.name("project").map(|m| m.as_str().to_owned());
let target_entity_type = match sigil {
"#" => "issue",
"!" => "merge_request",
_ => return None,
};
Some(ParsedCrossRef {
reference_type: reference_type.to_owned(),
target_entity_type: target_entity_type.to_owned(),
target_iid: iid,
target_project_path: project,
})
}
struct SystemNote {
note_id: i64,
body: String,
noteable_type: String,
entity_id: i64,
}
pub fn extract_refs_from_system_notes(conn: &Connection, project_id: i64) -> Result<ExtractResult> {
let mut result = ExtractResult::default();
let mut stmt = conn.prepare_cached(
"SELECT n.id, n.body, d.noteable_type,
COALESCE(d.issue_id, d.merge_request_id) AS entity_id
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
WHERE n.is_system = 1
AND n.project_id = ?1
AND n.body IS NOT NULL",
)?;
let notes: Vec<SystemNote> = stmt
.query_map([project_id], |row| {
Ok(SystemNote {
note_id: row.get(0)?,
body: row.get(1)?,
noteable_type: row.get(2)?,
entity_id: row.get(3)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
if notes.is_empty() {
return Ok(result);
}
let mut insert_stmt = conn.prepare_cached(
"INSERT OR IGNORE INTO entity_references
(project_id, source_entity_type, source_entity_id,
target_entity_type, target_entity_id,
target_project_path, target_entity_iid,
reference_type, source_method, created_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, 'note_parse', ?9)",
)?;
let now = now_ms();
for note in &notes {
let cross_refs = parse_cross_refs(&note.body);
if cross_refs.is_empty() {
debug!(
note_id = note.note_id,
body = %note.body,
"System note did not match any cross-reference pattern"
);
result.parse_failures += 1;
continue;
}
let source_entity_type = noteable_type_to_entity_type(&note.noteable_type);
for xref in &cross_refs {
let target_entity_id = if xref.target_project_path.is_none() {
resolve_entity_id(conn, project_id, &xref.target_entity_type, xref.target_iid)
} else {
resolve_cross_project_entity(
conn,
xref.target_project_path.as_deref().unwrap_or_default(),
&xref.target_entity_type,
xref.target_iid,
)
};
let rows_changed = insert_stmt.execute(rusqlite::params![
project_id,
source_entity_type,
note.entity_id,
xref.target_entity_type,
target_entity_id,
xref.target_project_path,
if target_entity_id.is_none() {
Some(xref.target_iid)
} else {
None
},
xref.reference_type,
now,
])?;
if rows_changed > 0 {
if target_entity_id.is_none() {
result.skipped_unresolvable += 1;
} else {
result.inserted += 1;
}
}
}
}
if result.inserted > 0 || result.skipped_unresolvable > 0 {
debug!(
inserted = result.inserted,
unresolvable = result.skipped_unresolvable,
parse_failures = result.parse_failures,
"System note cross-reference extraction complete"
);
}
Ok(result)
}
fn noteable_type_to_entity_type(noteable_type: &str) -> &str {
match noteable_type {
"Issue" => "issue",
"MergeRequest" => "merge_request",
other => {
debug!(noteable_type = %other, "Unknown noteable_type, defaulting to issue");
"issue"
}
}
}
fn resolve_entity_id(
conn: &Connection,
project_id: i64,
entity_type: &str,
iid: i64,
) -> Option<i64> {
let (table, id_col) = match entity_type {
"issue" => ("issues", "id"),
"merge_request" => ("merge_requests", "id"),
_ => return None,
};
let sql = format!("SELECT {id_col} FROM {table} WHERE project_id = ?1 AND iid = ?2");
conn.query_row(&sql, rusqlite::params![project_id, iid], |row| row.get(0))
.ok()
}
fn resolve_cross_project_entity(
conn: &Connection,
project_path: &str,
entity_type: &str,
iid: i64,
) -> Option<i64> {
let project_id: i64 = conn
.query_row(
"SELECT id FROM projects WHERE path_with_namespace = ?1",
[project_path],
|row| row.get(0),
)
.ok()?;
resolve_entity_id(conn, project_id, entity_type, iid)
}
#[cfg(test)]
#[path = "note_parser_tests.rs"]
mod tests;