diff --git a/src/core/note_parser.rs b/src/core/note_parser.rs index 0c57dbf..afafd38 100644 --- a/src/core/note_parser.rs +++ b/src/core/note_parser.rs @@ -22,20 +22,34 @@ pub struct ExtractResult { pub parse_failures: usize, } +// GitLab system notes include the entity type word: "mentioned in issue #5" +// or "mentioned in merge request !730". The word is mandatory in real data, +// but we also keep the old bare-sigil form as a fallback (no data uses it today, +// but other GitLab instances might differ). static MENTIONED_RE: LazyLock = LazyLock::new(|| { Regex::new( - r"mentioned in (?:(?P[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P[#!])(?P\d+)", + r"mentioned in (?:issue |merge request )?(?:(?P[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P[#!])(?P\d+)", ) .expect("mentioned regex is valid") }); static CLOSED_BY_RE: LazyLock = LazyLock::new(|| { Regex::new( - r"closed by (?:(?P[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P[#!])(?P\d+)", + r"closed by (?:issue |merge request )?(?:(?P[\w][\w.\-]*(?:/[\w][\w.\-]*)+))?(?P[#!])(?P\d+)", ) .expect("closed_by regex is valid") }); +/// Matches full GitLab URLs like: +/// `https://gitlab.example.com/group/project/-/issues/123` +/// `https://gitlab.example.com/group/sub/project/-/merge_requests/456` +static GITLAB_URL_RE: LazyLock = LazyLock::new(|| { + Regex::new( + r"https?://[^\s/]+/(?P[^\s]+?)/-/(?Pissues|merge_requests)/(?P\d+)", + ) + .expect("gitlab url regex is valid") +}); + pub fn parse_cross_refs(body: &str) -> Vec { let mut refs = Vec::new(); @@ -54,6 +68,47 @@ pub fn parse_cross_refs(body: &str) -> Vec { refs } +/// Extract cross-references from GitLab URLs in free-text bodies (descriptions, user notes). +pub fn parse_url_refs(body: &str) -> Vec { + let mut refs = Vec::new(); + let mut seen = std::collections::HashSet::new(); + + for caps in GITLAB_URL_RE.captures_iter(body) { + let Some(entity_type_raw) = caps.name("entity_type").map(|m| m.as_str()) else { + continue; + }; + let Some(iid_str) = caps.name("iid").map(|m| m.as_str()) else { + continue; + }; + let Some(project) = caps.name("project").map(|m| m.as_str()) else { + continue; + }; + let Ok(iid) = iid_str.parse::() else { + continue; + }; + + let target_entity_type = match entity_type_raw { + "issues" => "issue", + "merge_requests" => "merge_request", + _ => continue, + }; + + let key = (target_entity_type, project.to_owned(), iid); + if !seen.insert(key) { + continue; // deduplicate within same body + } + + refs.push(ParsedCrossRef { + reference_type: "mentioned".to_owned(), + target_entity_type: target_entity_type.to_owned(), + target_iid: iid, + target_project_path: Some(project.to_owned()), + }); + } + + refs +} + fn capture_to_cross_ref( caps: ®ex::Captures<'_>, reference_type: &str, @@ -233,6 +288,189 @@ fn resolve_cross_project_entity( resolve_entity_id(conn, project_id, entity_type, iid) } +/// Extract cross-references from issue and MR descriptions (GitLab URLs only). +pub fn extract_refs_from_descriptions(conn: &Connection, project_id: i64) -> Result { + let mut result = ExtractResult::default(); + + let mut insert_stmt = conn.prepare_cached( + "INSERT OR IGNORE INTO entity_references + (project_id, source_entity_type, source_entity_id, + target_entity_type, target_entity_id, + target_project_path, target_entity_iid, + reference_type, source_method, created_at) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, 'description_parse', ?9)", + )?; + + let now = now_ms(); + + // Issues with descriptions + let mut issue_stmt = conn.prepare_cached( + "SELECT id, iid, description FROM issues + WHERE project_id = ?1 AND description IS NOT NULL AND description != ''", + )?; + let issues: Vec<(i64, i64, String)> = issue_stmt + .query_map([project_id], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?)) + })? + .collect::, _>>()?; + + for (entity_id, _iid, description) in &issues { + insert_url_refs( + conn, + &mut insert_stmt, + &mut result, + project_id, + "issue", + *entity_id, + description, + now, + )?; + } + + // Merge requests with descriptions + let mut mr_stmt = conn.prepare_cached( + "SELECT id, iid, description FROM merge_requests + WHERE project_id = ?1 AND description IS NOT NULL AND description != ''", + )?; + let mrs: Vec<(i64, i64, String)> = mr_stmt + .query_map([project_id], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?)) + })? + .collect::, _>>()?; + + for (entity_id, _iid, description) in &mrs { + insert_url_refs( + conn, + &mut insert_stmt, + &mut result, + project_id, + "merge_request", + *entity_id, + description, + now, + )?; + } + + if result.inserted > 0 || result.skipped_unresolvable > 0 { + debug!( + inserted = result.inserted, + unresolvable = result.skipped_unresolvable, + "Description cross-reference extraction complete" + ); + } + + Ok(result) +} + +/// Extract cross-references from user (non-system) notes (GitLab URLs only). +pub fn extract_refs_from_user_notes(conn: &Connection, project_id: i64) -> Result { + let mut result = ExtractResult::default(); + + let mut note_stmt = conn.prepare_cached( + "SELECT n.id, n.body, d.noteable_type, + COALESCE(d.issue_id, d.merge_request_id) AS entity_id + FROM notes n + JOIN discussions d ON n.discussion_id = d.id + WHERE n.is_system = 0 + AND n.project_id = ?1 + AND n.body IS NOT NULL", + )?; + + let notes: Vec<(i64, String, String, i64)> = note_stmt + .query_map([project_id], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)) + })? + .collect::, _>>()?; + + if notes.is_empty() { + return Ok(result); + } + + let mut insert_stmt = conn.prepare_cached( + "INSERT OR IGNORE INTO entity_references + (project_id, source_entity_type, source_entity_id, + target_entity_type, target_entity_id, + target_project_path, target_entity_iid, + reference_type, source_method, created_at) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, 'note_parse', ?9)", + )?; + + let now = now_ms(); + + for (_, body, noteable_type, entity_id) in ¬es { + let source_entity_type = noteable_type_to_entity_type(noteable_type); + insert_url_refs( + conn, + &mut insert_stmt, + &mut result, + project_id, + source_entity_type, + *entity_id, + body, + now, + )?; + } + + if result.inserted > 0 || result.skipped_unresolvable > 0 { + debug!( + inserted = result.inserted, + unresolvable = result.skipped_unresolvable, + "User note cross-reference extraction complete" + ); + } + + Ok(result) +} + +/// Shared helper: parse URL refs from a body and insert into entity_references. +#[allow(clippy::too_many_arguments)] +fn insert_url_refs( + conn: &Connection, + insert_stmt: &mut rusqlite::CachedStatement<'_>, + result: &mut ExtractResult, + project_id: i64, + source_entity_type: &str, + source_entity_id: i64, + body: &str, + now: i64, +) -> Result<()> { + let url_refs = parse_url_refs(body); + + for xref in &url_refs { + let target_entity_id = if let Some(ref path) = xref.target_project_path { + resolve_cross_project_entity(conn, path, &xref.target_entity_type, xref.target_iid) + } else { + resolve_entity_id(conn, project_id, &xref.target_entity_type, xref.target_iid) + }; + + let rows_changed = insert_stmt.execute(rusqlite::params![ + project_id, + source_entity_type, + source_entity_id, + xref.target_entity_type, + target_entity_id, + xref.target_project_path, + if target_entity_id.is_none() { + Some(xref.target_iid) + } else { + None + }, + xref.reference_type, + now, + ])?; + + if rows_changed > 0 { + if target_entity_id.is_none() { + result.skipped_unresolvable += 1; + } else { + result.inserted += 1; + } + } + } + + Ok(()) +} + #[cfg(test)] #[path = "note_parser_tests.rs"] mod tests; diff --git a/src/core/note_parser_tests.rs b/src/core/note_parser_tests.rs index 0cba3b2..abea956 100644 --- a/src/core/note_parser_tests.rs +++ b/src/core/note_parser_tests.rs @@ -1,8 +1,10 @@ use super::*; +// --- parse_cross_refs: real GitLab system note format --- + #[test] fn test_parse_mentioned_in_mr() { - let refs = parse_cross_refs("mentioned in !567"); + let refs = parse_cross_refs("mentioned in merge request !567"); assert_eq!(refs.len(), 1); assert_eq!(refs[0].reference_type, "mentioned"); assert_eq!(refs[0].target_entity_type, "merge_request"); @@ -12,7 +14,7 @@ fn test_parse_mentioned_in_mr() { #[test] fn test_parse_mentioned_in_issue() { - let refs = parse_cross_refs("mentioned in #234"); + let refs = parse_cross_refs("mentioned in issue #234"); assert_eq!(refs.len(), 1); assert_eq!(refs[0].reference_type, "mentioned"); assert_eq!(refs[0].target_entity_type, "issue"); @@ -22,7 +24,7 @@ fn test_parse_mentioned_in_issue() { #[test] fn test_parse_mentioned_cross_project() { - let refs = parse_cross_refs("mentioned in group/repo!789"); + let refs = parse_cross_refs("mentioned in merge request group/repo!789"); assert_eq!(refs.len(), 1); assert_eq!(refs[0].reference_type, "mentioned"); assert_eq!(refs[0].target_entity_type, "merge_request"); @@ -32,7 +34,7 @@ fn test_parse_mentioned_cross_project() { #[test] fn test_parse_mentioned_cross_project_issue() { - let refs = parse_cross_refs("mentioned in group/repo#123"); + let refs = parse_cross_refs("mentioned in issue group/repo#123"); assert_eq!(refs.len(), 1); assert_eq!(refs[0].reference_type, "mentioned"); assert_eq!(refs[0].target_entity_type, "issue"); @@ -42,7 +44,7 @@ fn test_parse_mentioned_cross_project_issue() { #[test] fn test_parse_closed_by_mr() { - let refs = parse_cross_refs("closed by !567"); + let refs = parse_cross_refs("closed by merge request !567"); assert_eq!(refs.len(), 1); assert_eq!(refs[0].reference_type, "closes"); assert_eq!(refs[0].target_entity_type, "merge_request"); @@ -52,7 +54,7 @@ fn test_parse_closed_by_mr() { #[test] fn test_parse_closed_by_cross_project() { - let refs = parse_cross_refs("closed by group/repo!789"); + let refs = parse_cross_refs("closed by merge request group/repo!789"); assert_eq!(refs.len(), 1); assert_eq!(refs[0].reference_type, "closes"); assert_eq!(refs[0].target_entity_type, "merge_request"); @@ -62,7 +64,7 @@ fn test_parse_closed_by_cross_project() { #[test] fn test_parse_multiple_refs() { - let refs = parse_cross_refs("mentioned in !123 and mentioned in #456"); + let refs = parse_cross_refs("mentioned in merge request !123 and mentioned in issue #456"); assert_eq!(refs.len(), 2); assert_eq!(refs[0].target_entity_type, "merge_request"); assert_eq!(refs[0].target_iid, 123); @@ -84,7 +86,7 @@ fn test_parse_non_english_note() { #[test] fn test_parse_multi_level_group_path() { - let refs = parse_cross_refs("mentioned in top/sub/project#123"); + let refs = parse_cross_refs("mentioned in issue top/sub/project#123"); assert_eq!(refs.len(), 1); assert_eq!( refs[0].target_project_path.as_deref(), @@ -95,7 +97,7 @@ fn test_parse_multi_level_group_path() { #[test] fn test_parse_deeply_nested_group_path() { - let refs = parse_cross_refs("mentioned in a/b/c/d/e!42"); + let refs = parse_cross_refs("mentioned in merge request a/b/c/d/e!42"); assert_eq!(refs.len(), 1); assert_eq!(refs[0].target_project_path.as_deref(), Some("a/b/c/d/e")); assert_eq!(refs[0].target_iid, 42); @@ -103,7 +105,7 @@ fn test_parse_deeply_nested_group_path() { #[test] fn test_parse_hyphenated_project_path() { - let refs = parse_cross_refs("mentioned in my-group/my-project#99"); + let refs = parse_cross_refs("mentioned in issue my-group/my-project#99"); assert_eq!(refs.len(), 1); assert_eq!( refs[0].target_project_path.as_deref(), @@ -113,7 +115,7 @@ fn test_parse_hyphenated_project_path() { #[test] fn test_parse_dotted_project_path() { - let refs = parse_cross_refs("mentioned in visiostack.io/backend#123"); + let refs = parse_cross_refs("mentioned in issue visiostack.io/backend#123"); assert_eq!(refs.len(), 1); assert_eq!( refs[0].target_project_path.as_deref(), @@ -124,7 +126,7 @@ fn test_parse_dotted_project_path() { #[test] fn test_parse_dotted_nested_project_path() { - let refs = parse_cross_refs("closed by my.org/sub.group/my.project!42"); + let refs = parse_cross_refs("closed by merge request my.org/sub.group/my.project!42"); assert_eq!(refs.len(), 1); assert_eq!( refs[0].target_project_path.as_deref(), @@ -134,16 +136,27 @@ fn test_parse_dotted_nested_project_path() { assert_eq!(refs[0].target_iid, 42); } +// Bare-sigil fallback (no "issue"/"merge request" word) still works #[test] -fn test_parse_self_reference_is_valid() { +fn test_parse_bare_sigil_fallback() { let refs = parse_cross_refs("mentioned in #123"); assert_eq!(refs.len(), 1); assert_eq!(refs[0].target_iid, 123); + assert_eq!(refs[0].target_entity_type, "issue"); +} + +#[test] +fn test_parse_bare_sigil_closed_by() { + let refs = parse_cross_refs("closed by !567"); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].reference_type, "closes"); + assert_eq!(refs[0].target_entity_type, "merge_request"); + assert_eq!(refs[0].target_iid, 567); } #[test] fn test_parse_mixed_mentioned_and_closed() { - let refs = parse_cross_refs("mentioned in !10 and closed by !20"); + let refs = parse_cross_refs("mentioned in merge request !10 and closed by merge request !20"); assert_eq!(refs.len(), 2); assert_eq!(refs[0].reference_type, "mentioned"); assert_eq!(refs[0].target_iid, 10); @@ -151,6 +164,113 @@ fn test_parse_mixed_mentioned_and_closed() { assert_eq!(refs[1].target_iid, 20); } +// --- parse_url_refs --- + +#[test] +fn test_url_ref_same_project_issue() { + let refs = parse_url_refs( + "See https://gitlab.visiostack.com/vs/typescript-code/-/issues/3537 for details", + ); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].target_entity_type, "issue"); + assert_eq!(refs[0].target_iid, 3537); + assert_eq!( + refs[0].target_project_path.as_deref(), + Some("vs/typescript-code") + ); + assert_eq!(refs[0].reference_type, "mentioned"); +} + +#[test] +fn test_url_ref_merge_request() { + let refs = + parse_url_refs("https://gitlab.visiostack.com/vs/typescript-code/-/merge_requests/3548"); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].target_entity_type, "merge_request"); + assert_eq!(refs[0].target_iid, 3548); + assert_eq!( + refs[0].target_project_path.as_deref(), + Some("vs/typescript-code") + ); +} + +#[test] +fn test_url_ref_cross_project() { + let refs = parse_url_refs( + "Related: https://gitlab.visiostack.com/vs/python-code/-/merge_requests/5203", + ); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].target_entity_type, "merge_request"); + assert_eq!(refs[0].target_iid, 5203); + assert_eq!( + refs[0].target_project_path.as_deref(), + Some("vs/python-code") + ); +} + +#[test] +fn test_url_ref_with_anchor() { + let refs = + parse_url_refs("https://gitlab.visiostack.com/vs/typescript-code/-/issues/123#note_456"); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].target_entity_type, "issue"); + assert_eq!(refs[0].target_iid, 123); +} + +#[test] +fn test_url_ref_markdown_link() { + let refs = parse_url_refs( + "Check [this MR](https://gitlab.visiostack.com/vs/typescript-code/-/merge_requests/100) for context", + ); + assert_eq!(refs.len(), 1); + assert_eq!(refs[0].target_entity_type, "merge_request"); + assert_eq!(refs[0].target_iid, 100); +} + +#[test] +fn test_url_ref_multiple_urls() { + let body = + "See https://gitlab.com/a/b/-/issues/1 and https://gitlab.com/a/b/-/merge_requests/2"; + let refs = parse_url_refs(body); + assert_eq!(refs.len(), 2); + assert_eq!(refs[0].target_entity_type, "issue"); + assert_eq!(refs[0].target_iid, 1); + assert_eq!(refs[1].target_entity_type, "merge_request"); + assert_eq!(refs[1].target_iid, 2); +} + +#[test] +fn test_url_ref_deduplicates() { + let body = "See https://gitlab.com/a/b/-/issues/1 and again https://gitlab.com/a/b/-/issues/1"; + let refs = parse_url_refs(body); + assert_eq!( + refs.len(), + 1, + "Duplicate URLs in same body should be deduplicated" + ); +} + +#[test] +fn test_url_ref_non_gitlab_urls_ignored() { + let refs = parse_url_refs( + "Check https://google.com/search?q=test and https://github.com/org/repo/issues/1", + ); + assert!(refs.is_empty()); +} + +#[test] +fn test_url_ref_deeply_nested_project() { + let refs = parse_url_refs("https://gitlab.com/org/sub/deep/project/-/issues/42"); + assert_eq!(refs.len(), 1); + assert_eq!( + refs[0].target_project_path.as_deref(), + Some("org/sub/deep/project") + ); + assert_eq!(refs[0].target_iid, 42); +} + +// --- Integration tests: system notes (updated for real format) --- + fn setup_test_db() -> Connection { use crate::core::db::{create_connection, run_migrations}; @@ -204,27 +324,31 @@ fn seed_test_data(conn: &Connection) -> i64 { ) .unwrap(); + // System note: real GitLab format "mentioned in merge request !789" conn.execute( "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at) - VALUES (40, 4000, 30, 1, 1, 'mentioned in !789', ?1, ?1, ?1)", + VALUES (40, 4000, 30, 1, 1, 'mentioned in merge request !789', ?1, ?1, ?1)", [now], ) .unwrap(); + // System note: real GitLab format "mentioned in issue #456" conn.execute( "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at) - VALUES (41, 4001, 31, 1, 1, 'mentioned in #456', ?1, ?1, ?1)", + VALUES (41, 4001, 31, 1, 1, 'mentioned in issue #456', ?1, ?1, ?1)", [now], ) .unwrap(); + // User note (is_system=0) — should NOT be processed by system note extractor conn.execute( "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at) - VALUES (42, 4002, 30, 1, 0, 'mentioned in !999', ?1, ?1, ?1)", + VALUES (42, 4002, 30, 1, 0, 'mentioned in merge request !999', ?1, ?1, ?1)", [now], ) .unwrap(); + // System note with no cross-ref pattern conn.execute( "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at) VALUES (43, 4003, 30, 1, 1, 'added label ~bug', ?1, ?1, ?1)", @@ -232,9 +356,10 @@ fn seed_test_data(conn: &Connection) -> i64 { ) .unwrap(); + // System note: cross-project ref conn.execute( "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at) - VALUES (44, 4004, 30, 1, 1, 'mentioned in other/project#999', ?1, ?1, ?1)", + VALUES (44, 4004, 30, 1, 1, 'mentioned in issue other/project#999', ?1, ?1, ?1)", [now], ) .unwrap(); @@ -323,3 +448,323 @@ fn test_extract_refs_empty_project() { assert_eq!(result.skipped_unresolvable, 0); assert_eq!(result.parse_failures, 0); } + +// --- Integration tests: description extraction --- + +#[test] +fn test_extract_refs_from_descriptions_issue() { + let conn = setup_test_db(); + let now = now_ms(); + + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at) + VALUES (1, 100, 'vs/typescript-code', 'https://gitlab.com/vs/typescript-code', ?1, ?1)", + [now], + ) + .unwrap(); + + // Issue with MR reference in description + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, description, created_at, updated_at, last_seen_at) + VALUES (10, 1000, 1, 3537, 'Test Issue', 'opened', + 'Related to https://gitlab.com/vs/typescript-code/-/merge_requests/3548', + ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + // The target MR so it resolves + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at) + VALUES (20, 2000, 1, 3548, 'Fix MR', 'merged', 'fix', 'main', 'dev', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + let result = extract_refs_from_descriptions(&conn, 1).unwrap(); + + assert_eq!(result.inserted, 1, "Should insert 1 description ref"); + assert_eq!(result.skipped_unresolvable, 0); + + let method: String = conn + .query_row( + "SELECT source_method FROM entity_references WHERE project_id = 1", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(method, "description_parse"); +} + +#[test] +fn test_extract_refs_from_descriptions_mr() { + let conn = setup_test_db(); + let now = now_ms(); + + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at) + VALUES (1, 100, 'vs/typescript-code', 'https://gitlab.com/vs/typescript-code', ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) + VALUES (10, 1000, 1, 100, 'Target Issue', 'opened', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, description, created_at, updated_at, last_seen_at) + VALUES (20, 2000, 1, 200, 'Fixing MR', 'merged', 'fix', 'main', 'dev', + 'Fixes https://gitlab.com/vs/typescript-code/-/issues/100', + ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + let result = extract_refs_from_descriptions(&conn, 1).unwrap(); + + assert_eq!(result.inserted, 1); + + let (src_type, tgt_type): (String, String) = conn + .query_row( + "SELECT source_entity_type, target_entity_type FROM entity_references WHERE project_id = 1", + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .unwrap(); + assert_eq!(src_type, "merge_request"); + assert_eq!(tgt_type, "issue"); +} + +#[test] +fn test_extract_refs_from_descriptions_idempotent() { + let conn = setup_test_db(); + let now = now_ms(); + + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at) + VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, description, created_at, updated_at, last_seen_at) + VALUES (10, 1000, 1, 1, 'Issue', 'opened', + 'See https://gitlab.com/vs/code/-/merge_requests/2', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at) + VALUES (20, 2000, 1, 2, 'MR', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + let r1 = extract_refs_from_descriptions(&conn, 1).unwrap(); + assert_eq!(r1.inserted, 1); + + let r2 = extract_refs_from_descriptions(&conn, 1).unwrap(); + assert_eq!(r2.inserted, 0, "Second run should insert 0 (idempotent)"); +} + +#[test] +fn test_extract_refs_from_descriptions_cross_project_unresolved() { + let conn = setup_test_db(); + let now = now_ms(); + + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at) + VALUES (1, 100, 'vs/typescript-code', 'https://gitlab.com/vs/typescript-code', ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, description, created_at, updated_at, last_seen_at) + VALUES (10, 1000, 1, 1, 'Issue', 'opened', + 'See https://gitlab.com/vs/other-project/-/merge_requests/99', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + let result = extract_refs_from_descriptions(&conn, 1).unwrap(); + + assert_eq!(result.inserted, 0); + assert_eq!( + result.skipped_unresolvable, 1, + "Cross-project ref with no matching project should be unresolvable" + ); + + let (path, iid): (String, i64) = conn + .query_row( + "SELECT target_project_path, target_entity_iid FROM entity_references WHERE target_entity_id IS NULL", + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .unwrap(); + assert_eq!(path, "vs/other-project"); + assert_eq!(iid, 99); +} + +// --- Integration tests: user note extraction --- + +#[test] +fn test_extract_refs_from_user_notes_with_url() { + let conn = setup_test_db(); + let now = now_ms(); + + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at) + VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) + VALUES (10, 1000, 1, 50, 'Source Issue', 'opened', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at) + VALUES (20, 2000, 1, 60, 'Target MR', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) + VALUES (30, 'disc-user', 1, 10, 'Issue', ?1)", + [now], + ) + .unwrap(); + + // User note with a URL + conn.execute( + "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at) + VALUES (40, 4000, 30, 1, 0, + 'This is related to https://gitlab.com/vs/code/-/merge_requests/60', + ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + let result = extract_refs_from_user_notes(&conn, 1).unwrap(); + + assert_eq!(result.inserted, 1); + + let method: String = conn + .query_row( + "SELECT source_method FROM entity_references WHERE project_id = 1", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(method, "note_parse"); +} + +#[test] +fn test_extract_refs_from_user_notes_no_system_note_patterns() { + let conn = setup_test_db(); + let now = now_ms(); + + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at) + VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) + VALUES (10, 1000, 1, 50, 'Source', 'opened', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at) + VALUES (20, 2000, 1, 999, 'Target', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) + VALUES (30, 'disc-x', 1, 10, 'Issue', ?1)", + [now], + ) + .unwrap(); + + // User note with system-note-like text but no URL — should NOT extract + // (user notes only use URL parsing, not system note pattern matching) + conn.execute( + "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at) + VALUES (40, 4000, 30, 1, 0, 'mentioned in merge request !999', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + let result = extract_refs_from_user_notes(&conn, 1).unwrap(); + + assert_eq!( + result.inserted, 0, + "User notes should only parse URLs, not system note patterns" + ); +} + +#[test] +fn test_extract_refs_from_user_notes_idempotent() { + let conn = setup_test_db(); + let now = now_ms(); + + conn.execute( + "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url, created_at, updated_at) + VALUES (1, 100, 'vs/code', 'https://gitlab.com/vs/code', ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) + VALUES (10, 1000, 1, 1, 'Src', 'opened', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, state, source_branch, target_branch, author_username, created_at, updated_at, last_seen_at) + VALUES (20, 2000, 1, 2, 'Tgt', 'opened', 'x', 'main', 'dev', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) + VALUES (30, 'disc-y', 1, 10, 'Issue', ?1)", + [now], + ) + .unwrap(); + + conn.execute( + "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, is_system, body, created_at, updated_at, last_seen_at) + VALUES (40, 4000, 30, 1, 0, + 'See https://gitlab.com/vs/code/-/merge_requests/2', ?1, ?1, ?1)", + [now], + ) + .unwrap(); + + let r1 = extract_refs_from_user_notes(&conn, 1).unwrap(); + assert_eq!(r1.inserted, 1); + + let r2 = extract_refs_from_user_notes(&conn, 1).unwrap(); + assert_eq!(r2.inserted, 0, "Second extraction should be idempotent"); +} diff --git a/src/ingestion/orchestrator.rs b/src/ingestion/orchestrator.rs index aadd92e..c1c68fb 100644 --- a/src/ingestion/orchestrator.rs +++ b/src/ingestion/orchestrator.rs @@ -640,6 +640,24 @@ pub async fn ingest_project_merge_requests_with_progress( ); } + let desc_refs = crate::core::note_parser::extract_refs_from_descriptions(conn, project_id)?; + if desc_refs.inserted > 0 || desc_refs.skipped_unresolvable > 0 { + debug!( + inserted = desc_refs.inserted, + unresolvable = desc_refs.skipped_unresolvable, + "Extracted cross-references from descriptions" + ); + } + + let user_note_refs = crate::core::note_parser::extract_refs_from_user_notes(conn, project_id)?; + if user_note_refs.inserted > 0 || user_note_refs.skipped_unresolvable > 0 { + debug!( + inserted = user_note_refs.inserted, + unresolvable = user_note_refs.skipped_unresolvable, + "Extracted cross-references from user notes" + ); + } + { let enqueued = enqueue_mr_closes_issues_jobs(conn, project_id)?; if enqueued > 0 {