use chrono::DateTime; use rusqlite::Connection; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use std::collections::BTreeSet; use super::truncation::{ MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap, }; use crate::core::error::Result; /// Source type for documents. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum SourceType { Issue, MergeRequest, Discussion, } impl SourceType { pub fn as_str(&self) -> &'static str { match self { Self::Issue => "issue", Self::MergeRequest => "merge_request", Self::Discussion => "discussion", } } /// Parse from CLI input, accepting common aliases. /// /// Accepts: "issue", "issues", "mr", "mrs", "merge_request", "merge_requests", /// "discussion", "discussions" pub fn parse(s: &str) -> Option { match s.to_lowercase().as_str() { "issue" | "issues" => Some(Self::Issue), "mr" | "mrs" | "merge_request" | "merge_requests" => Some(Self::MergeRequest), "discussion" | "discussions" => Some(Self::Discussion), _ => None, } } } impl std::fmt::Display for SourceType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.as_str()) } } /// Generated document ready for storage. #[derive(Debug, Clone)] pub struct DocumentData { pub source_type: SourceType, pub source_id: i64, pub project_id: i64, pub author_username: Option, pub labels: Vec, pub paths: Vec, pub labels_hash: String, pub paths_hash: String, pub created_at: i64, pub updated_at: i64, pub url: Option, pub title: Option, pub content_text: String, pub content_hash: String, pub is_truncated: bool, pub truncated_reason: Option, } /// Compute SHA-256 hash of content. pub fn compute_content_hash(content: &str) -> String { let mut hasher = Sha256::new(); hasher.update(content.as_bytes()); format!("{:x}", hasher.finalize()) } /// Compute SHA-256 hash over a sorted list of strings. /// Used for labels_hash and paths_hash to detect changes efficiently. /// Sorts by index reference to avoid cloning, hashes incrementally to avoid join allocation. pub fn compute_list_hash(items: &[String]) -> String { let mut indices: Vec = (0..items.len()).collect(); indices.sort_by(|a, b| items[*a].cmp(&items[*b])); let mut hasher = Sha256::new(); for (i, &idx) in indices.iter().enumerate() { if i > 0 { hasher.update(b"\n"); } hasher.update(items[idx].as_bytes()); } format!("{:x}", hasher.finalize()) } /// Extract a searchable document from an issue. /// Returns None if the issue has been deleted from the DB. pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result> { // Query main issue entity with project info let row = conn.query_row( "SELECT i.id, i.iid, i.title, i.description, i.state, i.author_username, i.created_at, i.updated_at, i.web_url, p.path_with_namespace, p.id AS project_id FROM issues i JOIN projects p ON p.id = i.project_id WHERE i.id = ?1", rusqlite::params![issue_id], |row| { Ok(( row.get::<_, i64>(0)?, // id row.get::<_, i64>(1)?, // iid row.get::<_, Option>(2)?, // title row.get::<_, Option>(3)?, // description row.get::<_, String>(4)?, // state row.get::<_, Option>(5)?, // author_username row.get::<_, i64>(6)?, // created_at row.get::<_, i64>(7)?, // updated_at row.get::<_, Option>(8)?, // web_url row.get::<_, String>(9)?, // path_with_namespace row.get::<_, i64>(10)?, // project_id )) }, ); let ( id, iid, title, description, state, author_username, created_at, updated_at, web_url, path_with_namespace, project_id, ) = match row { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; // Query labels via junction table let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM issue_labels il JOIN labels l ON l.id = il.label_id WHERE il.issue_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![id], |row| row.get(0))? .collect::, _>>()?; // Build labels JSON array string let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string()); // Format content_text per PRD template let display_title = title.as_deref().unwrap_or("(untitled)"); let mut content = format!( "[[Issue]] #{}: {}\nProject: {}\n", iid, display_title, path_with_namespace ); if let Some(ref url) = web_url { content.push_str(&format!("URL: {}\n", url)); } content.push_str(&format!("Labels: {}\n", labels_json)); content.push_str(&format!("State: {}\n", state)); if let Some(ref author) = author_username { content.push_str(&format!("Author: @{}\n", author)); } // Add description section only if description is Some if let Some(ref desc) = description { content.push_str("\n--- Description ---\n\n"); content.push_str(desc); } let labels_hash = compute_list_hash(&labels); let paths_hash = compute_list_hash(&[]); // Issues have no paths // Apply hard cap truncation for safety, then hash the final stored content let hard_cap = truncate_hard_cap(&content); let content_hash = compute_content_hash(&hard_cap.content); Ok(Some(DocumentData { source_type: SourceType::Issue, source_id: id, project_id, author_username, labels, paths: Vec::new(), labels_hash, paths_hash, created_at, updated_at, url: web_url, title: Some(display_title.to_string()), content_text: hard_cap.content, content_hash, is_truncated: hard_cap.is_truncated, truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()), })) } /// Extract a searchable document from a merge request. /// Returns None if the MR has been deleted from the DB. pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result> { let row = conn.query_row( "SELECT m.id, m.iid, m.title, m.description, m.state, m.author_username, m.source_branch, m.target_branch, m.created_at, m.updated_at, m.web_url, p.path_with_namespace, p.id AS project_id FROM merge_requests m JOIN projects p ON p.id = m.project_id WHERE m.id = ?1", rusqlite::params![mr_id], |row| { Ok(( row.get::<_, i64>(0)?, // id row.get::<_, i64>(1)?, // iid row.get::<_, Option>(2)?, // title row.get::<_, Option>(3)?, // description row.get::<_, Option>(4)?, // state row.get::<_, Option>(5)?, // author_username row.get::<_, Option>(6)?, // source_branch row.get::<_, Option>(7)?, // target_branch row.get::<_, Option>(8)?, // created_at (nullable in schema) row.get::<_, Option>(9)?, // updated_at (nullable in schema) row.get::<_, Option>(10)?, // web_url row.get::<_, String>(11)?, // path_with_namespace row.get::<_, i64>(12)?, // project_id )) }, ); let ( id, iid, title, description, state, author_username, source_branch, target_branch, created_at, updated_at, web_url, path_with_namespace, project_id, ) = match row { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; // Query labels via junction table let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM mr_labels ml JOIN labels l ON l.id = ml.label_id WHERE ml.merge_request_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![id], |row| row.get(0))? .collect::, _>>()?; let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string()); let display_title = title.as_deref().unwrap_or("(untitled)"); let display_state = state.as_deref().unwrap_or("unknown"); let mut content = format!( "[[MergeRequest]] !{}: {}\nProject: {}\n", iid, display_title, path_with_namespace ); if let Some(ref url) = web_url { content.push_str(&format!("URL: {}\n", url)); } content.push_str(&format!("Labels: {}\n", labels_json)); content.push_str(&format!("State: {}\n", display_state)); if let Some(ref author) = author_username { content.push_str(&format!("Author: @{}\n", author)); } // Source line: source_branch -> target_branch if let (Some(src), Some(tgt)) = (&source_branch, &target_branch) { content.push_str(&format!("Source: {} -> {}\n", src, tgt)); } if let Some(ref desc) = description { content.push_str("\n--- Description ---\n\n"); content.push_str(desc); } let labels_hash = compute_list_hash(&labels); let paths_hash = compute_list_hash(&[]); // Apply hard cap truncation for safety, then hash the final stored content let hard_cap = truncate_hard_cap(&content); let content_hash = compute_content_hash(&hard_cap.content); Ok(Some(DocumentData { source_type: SourceType::MergeRequest, source_id: id, project_id, author_username, labels, paths: Vec::new(), labels_hash, paths_hash, created_at: created_at.unwrap_or(0), updated_at: updated_at.unwrap_or(0), url: web_url, title: Some(display_title.to_string()), content_text: hard_cap.content, content_hash, is_truncated: hard_cap.is_truncated, truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()), })) } /// Format ms epoch as YYYY-MM-DD date string. fn format_date(ms: i64) -> String { DateTime::from_timestamp_millis(ms) .map(|dt| dt.format("%Y-%m-%d").to_string()) .unwrap_or_else(|| "unknown".to_string()) } /// Extract a searchable document from a discussion thread. /// Returns None if the discussion or its parent has been deleted. pub fn extract_discussion_document( conn: &Connection, discussion_id: i64, ) -> Result> { // Query discussion metadata let disc_row = conn.query_row( "SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id, p.path_with_namespace, p.id AS project_id FROM discussions d JOIN projects p ON p.id = d.project_id WHERE d.id = ?1", rusqlite::params![discussion_id], |row| { Ok(( row.get::<_, i64>(0)?, // id row.get::<_, String>(1)?, // noteable_type row.get::<_, Option>(2)?, // issue_id row.get::<_, Option>(3)?, // merge_request_id row.get::<_, String>(4)?, // path_with_namespace row.get::<_, i64>(5)?, // project_id )) }, ); let (id, noteable_type, issue_id, merge_request_id, path_with_namespace, project_id) = match disc_row { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; // Query parent entity let (_parent_iid, parent_title, parent_web_url, parent_type_prefix, labels) = match noteable_type.as_str() { "Issue" => { let parent_id = match issue_id { Some(pid) => pid, None => return Ok(None), }; let parent = conn.query_row( "SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1", rusqlite::params![parent_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, Option>(1)?, row.get::<_, Option>(2)?, )) }, ); let (iid, title, web_url) = match parent { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; // Query parent labels let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM issue_labels il JOIN labels l ON l.id = il.label_id WHERE il.issue_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![parent_id], |row| row.get(0))? .collect::, _>>()?; (iid, title, web_url, format!("Issue #{}", iid), labels) } "MergeRequest" => { let parent_id = match merge_request_id { Some(pid) => pid, None => return Ok(None), }; let parent = conn.query_row( "SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1", rusqlite::params![parent_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, Option>(1)?, row.get::<_, Option>(2)?, )) }, ); let (iid, title, web_url) = match parent { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; // Query parent labels let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM mr_labels ml JOIN labels l ON l.id = ml.label_id WHERE ml.merge_request_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![parent_id], |row| row.get(0))? .collect::, _>>()?; (iid, title, web_url, format!("MR !{}", iid), labels) } _ => return Ok(None), }; // Query non-system notes in thread order let mut note_stmt = conn.prepare_cached( "SELECT n.author_username, n.body, n.created_at, n.gitlab_id, n.note_type, n.position_old_path, n.position_new_path FROM notes n WHERE n.discussion_id = ?1 AND n.is_system = 0 ORDER BY n.created_at ASC, n.id ASC", )?; struct NoteRow { author: Option, body: Option, created_at: i64, gitlab_id: i64, old_path: Option, new_path: Option, } let notes: Vec = note_stmt .query_map(rusqlite::params![id], |row| { Ok(NoteRow { author: row.get(0)?, body: row.get(1)?, created_at: row.get(2)?, gitlab_id: row.get(3)?, // index 4 is note_type (unused here) old_path: row.get(5)?, new_path: row.get(6)?, }) })? .collect::, _>>()?; if notes.is_empty() { return Ok(None); } // Extract DiffNote paths (deduplicated, sorted) let mut path_set = BTreeSet::new(); for note in ¬es { if let Some(ref p) = note.old_path && !p.is_empty() { path_set.insert(p.clone()); } if let Some(ref p) = note.new_path && !p.is_empty() { path_set.insert(p.clone()); } } let paths: Vec = path_set.into_iter().collect(); // Construct URL: parent_web_url#note_{first_note_gitlab_id} let first_note_gitlab_id = notes[0].gitlab_id; let url = parent_web_url .as_ref() .map(|wu| format!("{}#note_{}", wu, first_note_gitlab_id)); // First non-system note author let author_username = notes[0].author.clone(); // Build content let display_title = parent_title.as_deref().unwrap_or("(untitled)"); let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string()); let paths_json = serde_json::to_string(&paths).unwrap_or_else(|_| "[]".to_string()); let mut content = format!( "[[Discussion]] {}: {}\nProject: {}\n", parent_type_prefix, display_title, path_with_namespace ); if let Some(ref u) = url { content.push_str(&format!("URL: {}\n", u)); } content.push_str(&format!("Labels: {}\n", labels_json)); if !paths.is_empty() { content.push_str(&format!("Files: {}\n", paths_json)); } // Build NoteContent list for truncation-aware thread rendering let note_contents: Vec = notes .iter() .map(|note| NoteContent { author: note.author.as_deref().unwrap_or("unknown").to_string(), date: format_date(note.created_at), body: note.body.as_deref().unwrap_or("").to_string(), }) .collect(); // Estimate header size to reserve budget for thread content let header_len = content.len() + "\n--- Thread ---\n\n".len(); let thread_budget = MAX_DISCUSSION_BYTES.saturating_sub(header_len); let thread_result = truncate_discussion(¬e_contents, thread_budget); content.push_str("\n--- Thread ---\n\n"); content.push_str(&thread_result.content); // Use first note's created_at and last note's created_at for timestamps let created_at = notes[0].created_at; let updated_at = notes.last().map(|n| n.created_at).unwrap_or(created_at); let content_hash = compute_content_hash(&content); let labels_hash = compute_list_hash(&labels); let paths_hash = compute_list_hash(&paths); Ok(Some(DocumentData { source_type: SourceType::Discussion, source_id: id, project_id, author_username, labels, paths, labels_hash, paths_hash, created_at, updated_at, url, title: None, // Discussions don't have their own title content_text: content, content_hash, is_truncated: thread_result.is_truncated, truncated_reason: thread_result.reason.map(|r| r.as_str().to_string()), })) } #[cfg(test)] mod tests { use super::*; #[test] fn test_source_type_parse_aliases() { assert_eq!(SourceType::parse("issue"), Some(SourceType::Issue)); assert_eq!(SourceType::parse("issues"), Some(SourceType::Issue)); assert_eq!(SourceType::parse("mr"), Some(SourceType::MergeRequest)); assert_eq!(SourceType::parse("mrs"), Some(SourceType::MergeRequest)); assert_eq!( SourceType::parse("merge_request"), Some(SourceType::MergeRequest) ); assert_eq!( SourceType::parse("merge_requests"), Some(SourceType::MergeRequest) ); assert_eq!( SourceType::parse("discussion"), Some(SourceType::Discussion) ); assert_eq!( SourceType::parse("discussions"), Some(SourceType::Discussion) ); assert_eq!(SourceType::parse("invalid"), None); assert_eq!(SourceType::parse("ISSUE"), Some(SourceType::Issue)); // case insensitive } #[test] fn test_source_type_as_str() { assert_eq!(SourceType::Issue.as_str(), "issue"); assert_eq!(SourceType::MergeRequest.as_str(), "merge_request"); assert_eq!(SourceType::Discussion.as_str(), "discussion"); } #[test] fn test_source_type_display() { assert_eq!(format!("{}", SourceType::Issue), "issue"); assert_eq!(format!("{}", SourceType::MergeRequest), "merge_request"); assert_eq!(format!("{}", SourceType::Discussion), "discussion"); } #[test] fn test_content_hash_deterministic() { let hash1 = compute_content_hash("hello"); let hash2 = compute_content_hash("hello"); assert_eq!(hash1, hash2); assert!(!hash1.is_empty()); // SHA-256 of "hello" is known assert_eq!(hash1.len(), 64); // 256 bits = 64 hex chars } #[test] fn test_content_hash_different_inputs() { let hash1 = compute_content_hash("hello"); let hash2 = compute_content_hash("world"); assert_ne!(hash1, hash2); } #[test] fn test_content_hash_empty() { let hash = compute_content_hash(""); assert_eq!(hash.len(), 64); } #[test] fn test_list_hash_order_independent() { let hash1 = compute_list_hash(&["b".to_string(), "a".to_string()]); let hash2 = compute_list_hash(&["a".to_string(), "b".to_string()]); assert_eq!(hash1, hash2); } #[test] fn test_list_hash_empty() { let hash = compute_list_hash(&[]); assert_eq!(hash.len(), 64); // Empty list hashes consistently let hash2 = compute_list_hash(&[]); assert_eq!(hash, hash2); } // Helper to create an in-memory DB with the required tables for extraction tests fn setup_test_db() -> Connection { let conn = Connection::open_in_memory().unwrap(); conn.execute_batch( " CREATE TABLE projects ( id INTEGER PRIMARY KEY, gitlab_project_id INTEGER UNIQUE NOT NULL, path_with_namespace TEXT NOT NULL, default_branch TEXT, web_url TEXT, created_at INTEGER, updated_at INTEGER, raw_payload_id INTEGER ); CREATE TABLE issues ( id INTEGER PRIMARY KEY, gitlab_id INTEGER UNIQUE NOT NULL, project_id INTEGER NOT NULL REFERENCES projects(id), iid INTEGER NOT NULL, title TEXT, description TEXT, state TEXT NOT NULL, author_username TEXT, created_at INTEGER NOT NULL, updated_at INTEGER NOT NULL, last_seen_at INTEGER NOT NULL, discussions_synced_for_updated_at INTEGER, resource_events_synced_for_updated_at INTEGER, web_url TEXT, raw_payload_id INTEGER ); CREATE TABLE labels ( id INTEGER PRIMARY KEY, gitlab_id INTEGER, project_id INTEGER NOT NULL REFERENCES projects(id), name TEXT NOT NULL, color TEXT, description TEXT ); CREATE TABLE issue_labels ( issue_id INTEGER NOT NULL REFERENCES issues(id), label_id INTEGER NOT NULL REFERENCES labels(id), PRIMARY KEY(issue_id, label_id) ); ", ) .unwrap(); // Insert a test project conn.execute( "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) VALUES (1, 100, 'group/project-one', 'https://gitlab.example.com/group/project-one')", [], ).unwrap(); conn } #[allow(clippy::too_many_arguments)] fn insert_issue( conn: &Connection, id: i64, iid: i64, title: Option<&str>, description: Option<&str>, state: &str, author: Option<&str>, web_url: Option<&str>, ) { conn.execute( "INSERT INTO issues (id, gitlab_id, project_id, iid, title, description, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, 1000, 2000, 3000, ?8)", rusqlite::params![id, id * 10, iid, title, description, state, author, web_url], ).unwrap(); } fn insert_label(conn: &Connection, id: i64, name: &str) { conn.execute( "INSERT INTO labels (id, project_id, name) VALUES (?1, 1, ?2)", rusqlite::params![id, name], ) .unwrap(); } fn link_issue_label(conn: &Connection, issue_id: i64, label_id: i64) { conn.execute( "INSERT INTO issue_labels (issue_id, label_id) VALUES (?1, ?2)", rusqlite::params![issue_id, label_id], ) .unwrap(); } #[test] fn test_issue_document_format() { let conn = setup_test_db(); insert_issue( &conn, 1, 234, Some("Authentication redesign"), Some("We need to modernize our authentication system..."), "opened", Some("johndoe"), Some("https://gitlab.example.com/group/project-one/-/issues/234"), ); insert_label(&conn, 1, "auth"); insert_label(&conn, 2, "bug"); link_issue_label(&conn, 1, 1); link_issue_label(&conn, 1, 2); let doc = extract_issue_document(&conn, 1).unwrap().unwrap(); assert_eq!(doc.source_type, SourceType::Issue); assert_eq!(doc.source_id, 1); assert_eq!(doc.project_id, 1); assert_eq!(doc.author_username, Some("johndoe".to_string())); assert!( doc.content_text .starts_with("[[Issue]] #234: Authentication redesign\n") ); assert!(doc.content_text.contains("Project: group/project-one\n")); assert!( doc.content_text .contains("URL: https://gitlab.example.com/group/project-one/-/issues/234\n") ); assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n")); assert!(doc.content_text.contains("State: opened\n")); assert!(doc.content_text.contains("Author: @johndoe\n")); assert!( doc.content_text.contains( "--- Description ---\n\nWe need to modernize our authentication system..." ) ); assert!(!doc.is_truncated); assert!(doc.paths.is_empty()); } #[test] fn test_issue_not_found() { let conn = setup_test_db(); let result = extract_issue_document(&conn, 999).unwrap(); assert!(result.is_none()); } #[test] fn test_issue_no_description() { let conn = setup_test_db(); insert_issue( &conn, 1, 10, Some("Quick fix"), None, "opened", Some("alice"), None, ); let doc = extract_issue_document(&conn, 1).unwrap().unwrap(); assert!(!doc.content_text.contains("--- Description ---")); assert!(doc.content_text.contains("[[Issue]] #10: Quick fix\n")); } #[test] fn test_issue_labels_sorted() { let conn = setup_test_db(); insert_issue( &conn, 1, 10, Some("Test"), Some("Body"), "opened", Some("bob"), None, ); insert_label(&conn, 1, "zeta"); insert_label(&conn, 2, "alpha"); insert_label(&conn, 3, "middle"); link_issue_label(&conn, 1, 1); link_issue_label(&conn, 1, 2); link_issue_label(&conn, 1, 3); let doc = extract_issue_document(&conn, 1).unwrap().unwrap(); assert_eq!(doc.labels, vec!["alpha", "middle", "zeta"]); assert!( doc.content_text .contains("Labels: [\"alpha\",\"middle\",\"zeta\"]") ); } #[test] fn test_issue_no_labels() { let conn = setup_test_db(); insert_issue( &conn, 1, 10, Some("Test"), Some("Body"), "opened", None, None, ); let doc = extract_issue_document(&conn, 1).unwrap().unwrap(); assert!(doc.labels.is_empty()); assert!(doc.content_text.contains("Labels: []\n")); } #[test] fn test_issue_hash_deterministic() { let conn = setup_test_db(); insert_issue( &conn, 1, 10, Some("Test"), Some("Body"), "opened", Some("alice"), None, ); let doc1 = extract_issue_document(&conn, 1).unwrap().unwrap(); let doc2 = extract_issue_document(&conn, 1).unwrap().unwrap(); assert_eq!(doc1.content_hash, doc2.content_hash); assert_eq!(doc1.labels_hash, doc2.labels_hash); assert_eq!(doc1.content_hash.len(), 64); } #[test] fn test_issue_empty_description() { let conn = setup_test_db(); insert_issue(&conn, 1, 10, Some("Test"), Some(""), "opened", None, None); let doc = extract_issue_document(&conn, 1).unwrap().unwrap(); // Empty string description still includes the section header assert!(doc.content_text.contains("--- Description ---\n\n")); } // --- MR extraction tests --- fn setup_mr_test_db() -> Connection { let conn = setup_test_db(); conn.execute_batch( " CREATE TABLE merge_requests ( id INTEGER PRIMARY KEY, gitlab_id INTEGER UNIQUE NOT NULL, project_id INTEGER NOT NULL REFERENCES projects(id), iid INTEGER NOT NULL, title TEXT, description TEXT, state TEXT, draft INTEGER NOT NULL DEFAULT 0, author_username TEXT, source_branch TEXT, target_branch TEXT, head_sha TEXT, references_short TEXT, references_full TEXT, detailed_merge_status TEXT, merge_user_username TEXT, created_at INTEGER, updated_at INTEGER, merged_at INTEGER, closed_at INTEGER, last_seen_at INTEGER NOT NULL, discussions_synced_for_updated_at INTEGER, discussions_sync_last_attempt_at INTEGER, discussions_sync_attempts INTEGER DEFAULT 0, discussions_sync_last_error TEXT, resource_events_synced_for_updated_at INTEGER, web_url TEXT, raw_payload_id INTEGER ); CREATE TABLE mr_labels ( merge_request_id INTEGER REFERENCES merge_requests(id), label_id INTEGER REFERENCES labels(id), PRIMARY KEY(merge_request_id, label_id) ); ", ) .unwrap(); conn } #[allow(clippy::too_many_arguments)] fn insert_mr( conn: &Connection, id: i64, iid: i64, title: Option<&str>, description: Option<&str>, state: Option<&str>, author: Option<&str>, source_branch: Option<&str>, target_branch: Option<&str>, web_url: Option<&str>, ) { conn.execute( "INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, description, state, author_username, source_branch, target_branch, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, ?8, ?9, 1000, 2000, 3000, ?10)", rusqlite::params![id, id * 10, iid, title, description, state, author, source_branch, target_branch, web_url], ).unwrap(); } fn link_mr_label(conn: &Connection, mr_id: i64, label_id: i64) { conn.execute( "INSERT INTO mr_labels (merge_request_id, label_id) VALUES (?1, ?2)", rusqlite::params![mr_id, label_id], ) .unwrap(); } #[test] fn test_mr_document_format() { let conn = setup_mr_test_db(); insert_mr( &conn, 1, 456, Some("Implement JWT authentication"), Some("This MR implements JWT-based authentication..."), Some("opened"), Some("johndoe"), Some("feature/jwt-auth"), Some("main"), Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"), ); insert_label(&conn, 1, "auth"); insert_label(&conn, 2, "feature"); link_mr_label(&conn, 1, 1); link_mr_label(&conn, 1, 2); let doc = extract_mr_document(&conn, 1).unwrap().unwrap(); assert_eq!(doc.source_type, SourceType::MergeRequest); assert_eq!(doc.source_id, 1); assert!( doc.content_text .starts_with("[[MergeRequest]] !456: Implement JWT authentication\n") ); assert!(doc.content_text.contains("Project: group/project-one\n")); assert!( doc.content_text .contains("Labels: [\"auth\",\"feature\"]\n") ); assert!(doc.content_text.contains("State: opened\n")); assert!(doc.content_text.contains("Author: @johndoe\n")); assert!( doc.content_text .contains("Source: feature/jwt-auth -> main\n") ); assert!( doc.content_text .contains("--- Description ---\n\nThis MR implements JWT-based authentication...") ); } #[test] fn test_mr_not_found() { let conn = setup_mr_test_db(); let result = extract_mr_document(&conn, 999).unwrap(); assert!(result.is_none()); } #[test] fn test_mr_no_description() { let conn = setup_mr_test_db(); insert_mr( &conn, 1, 10, Some("Quick fix"), None, Some("merged"), Some("alice"), Some("fix/bug"), Some("main"), None, ); let doc = extract_mr_document(&conn, 1).unwrap().unwrap(); assert!(!doc.content_text.contains("--- Description ---")); assert!( doc.content_text .contains("[[MergeRequest]] !10: Quick fix\n") ); } #[test] fn test_mr_branch_info() { let conn = setup_mr_test_db(); insert_mr( &conn, 1, 10, Some("Test"), Some("Body"), Some("opened"), None, Some("feature/foo"), Some("develop"), None, ); let doc = extract_mr_document(&conn, 1).unwrap().unwrap(); assert!( doc.content_text .contains("Source: feature/foo -> develop\n") ); } #[test] fn test_mr_no_branches() { let conn = setup_mr_test_db(); insert_mr( &conn, 1, 10, Some("Test"), None, Some("opened"), None, None, None, None, ); let doc = extract_mr_document(&conn, 1).unwrap().unwrap(); assert!(!doc.content_text.contains("Source:")); } // --- Discussion extraction tests --- fn setup_discussion_test_db() -> Connection { let conn = setup_mr_test_db(); // includes projects, issues schema, labels, mr tables conn.execute_batch( " CREATE TABLE discussions ( id INTEGER PRIMARY KEY, gitlab_discussion_id TEXT NOT NULL, project_id INTEGER NOT NULL REFERENCES projects(id), issue_id INTEGER REFERENCES issues(id), merge_request_id INTEGER, noteable_type TEXT NOT NULL, individual_note INTEGER NOT NULL DEFAULT 0, first_note_at INTEGER, last_note_at INTEGER, last_seen_at INTEGER NOT NULL, resolvable INTEGER NOT NULL DEFAULT 0, resolved INTEGER NOT NULL DEFAULT 0 ); CREATE TABLE notes ( id INTEGER PRIMARY KEY, gitlab_id INTEGER UNIQUE NOT NULL, discussion_id INTEGER NOT NULL REFERENCES discussions(id), project_id INTEGER NOT NULL REFERENCES projects(id), note_type TEXT, is_system INTEGER NOT NULL DEFAULT 0, author_username TEXT, body TEXT, created_at INTEGER NOT NULL, updated_at INTEGER NOT NULL, last_seen_at INTEGER NOT NULL, position INTEGER, resolvable INTEGER NOT NULL DEFAULT 0, resolved INTEGER NOT NULL DEFAULT 0, resolved_by TEXT, resolved_at INTEGER, position_old_path TEXT, position_new_path TEXT, position_old_line INTEGER, position_new_line INTEGER, raw_payload_id INTEGER ); ", ) .unwrap(); conn } fn insert_discussion( conn: &Connection, id: i64, noteable_type: &str, issue_id: Option, mr_id: Option, ) { conn.execute( "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, 1, ?3, ?4, ?5, 3000)", rusqlite::params![id, format!("disc_{}", id), issue_id, mr_id, noteable_type], ).unwrap(); } #[allow(clippy::too_many_arguments)] fn insert_note( conn: &Connection, id: i64, gitlab_id: i64, discussion_id: i64, author: Option<&str>, body: Option<&str>, created_at: i64, is_system: bool, old_path: Option<&str>, new_path: Option<&str>, ) { conn.execute( "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position_old_path, position_new_path) VALUES (?1, ?2, ?3, 1, ?4, ?5, ?6, ?6, ?6, ?7, ?8, ?9)", rusqlite::params![id, gitlab_id, discussion_id, author, body, created_at, is_system as i32, old_path, new_path], ).unwrap(); } #[test] fn test_discussion_document_format() { let conn = setup_discussion_test_db(); insert_issue( &conn, 1, 234, Some("Authentication redesign"), Some("desc"), "opened", Some("johndoe"), Some("https://gitlab.example.com/group/project-one/-/issues/234"), ); insert_label(&conn, 1, "auth"); insert_label(&conn, 2, "bug"); link_issue_label(&conn, 1, 1); link_issue_label(&conn, 1, 2); insert_discussion(&conn, 1, "Issue", Some(1), None); // 1710460800000 = 2024-03-15T00:00:00Z insert_note( &conn, 1, 12345, 1, Some("johndoe"), Some("I think we should move to JWT-based auth..."), 1710460800000, false, None, None, ); insert_note( &conn, 2, 12346, 1, Some("janedoe"), Some("Agreed. What about refresh token strategy?"), 1710460800000, false, None, None, ); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); assert_eq!(doc.source_type, SourceType::Discussion); assert!( doc.content_text .starts_with("[[Discussion]] Issue #234: Authentication redesign\n") ); assert!(doc.content_text.contains("Project: group/project-one\n")); assert!(doc.content_text.contains( "URL: https://gitlab.example.com/group/project-one/-/issues/234#note_12345\n" )); assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n")); assert!(doc.content_text.contains("--- Thread ---")); assert!( doc.content_text .contains("@johndoe (2024-03-15):\nI think we should move to JWT-based auth...") ); assert!( doc.content_text .contains("@janedoe (2024-03-15):\nAgreed. What about refresh token strategy?") ); assert_eq!(doc.author_username, Some("johndoe".to_string())); assert!(doc.title.is_none()); // Discussions don't have their own title } #[test] fn test_discussion_not_found() { let conn = setup_discussion_test_db(); let result = extract_discussion_document(&conn, 999).unwrap(); assert!(result.is_none()); } #[test] fn test_discussion_parent_deleted() { let conn = setup_discussion_test_db(); // Insert issue, create discussion, then delete the issue insert_issue( &conn, 99, 10, Some("To be deleted"), None, "opened", None, None, ); insert_discussion(&conn, 1, "Issue", Some(99), None); insert_note( &conn, 1, 100, 1, Some("alice"), Some("Hello"), 1000, false, None, None, ); // Delete the parent issue — FK cascade won't delete discussion in test since // we used REFERENCES without ON DELETE CASCADE in test schema, so just delete from issues conn.execute("PRAGMA foreign_keys = OFF", []).unwrap(); conn.execute("DELETE FROM issues WHERE id = 99", []) .unwrap(); conn.execute("PRAGMA foreign_keys = ON", []).unwrap(); let result = extract_discussion_document(&conn, 1).unwrap(); assert!(result.is_none()); } #[test] fn test_discussion_system_notes_excluded() { let conn = setup_discussion_test_db(); insert_issue( &conn, 1, 10, Some("Test"), Some("desc"), "opened", Some("alice"), None, ); insert_discussion(&conn, 1, "Issue", Some(1), None); insert_note( &conn, 1, 100, 1, Some("alice"), Some("Real comment"), 1000, false, None, None, ); insert_note( &conn, 2, 101, 1, Some("bot"), Some("assigned to @alice"), 2000, true, None, None, ); insert_note( &conn, 3, 102, 1, Some("bob"), Some("Follow-up"), 3000, false, None, None, ); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); assert!(doc.content_text.contains("@alice")); assert!(doc.content_text.contains("@bob")); assert!(!doc.content_text.contains("assigned to")); } #[test] fn test_discussion_diffnote_paths() { let conn = setup_discussion_test_db(); insert_issue( &conn, 1, 10, Some("Test"), Some("desc"), "opened", None, None, ); insert_discussion(&conn, 1, "Issue", Some(1), None); insert_note( &conn, 1, 100, 1, Some("alice"), Some("Comment on code"), 1000, false, Some("src/old.rs"), Some("src/new.rs"), ); insert_note( &conn, 2, 101, 1, Some("bob"), Some("Reply"), 2000, false, Some("src/old.rs"), Some("src/new.rs"), ); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); // Paths should be deduplicated and sorted assert_eq!(doc.paths, vec!["src/new.rs", "src/old.rs"]); assert!( doc.content_text .contains("Files: [\"src/new.rs\",\"src/old.rs\"]") ); } #[test] fn test_discussion_url_construction() { let conn = setup_discussion_test_db(); insert_issue( &conn, 1, 10, Some("Test"), Some("desc"), "opened", None, Some("https://gitlab.example.com/group/project-one/-/issues/10"), ); insert_discussion(&conn, 1, "Issue", Some(1), None); insert_note( &conn, 1, 54321, 1, Some("alice"), Some("Hello"), 1000, false, None, None, ); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); assert_eq!( doc.url, Some("https://gitlab.example.com/group/project-one/-/issues/10#note_54321".to_string()) ); } #[test] fn test_discussion_uses_parent_labels() { let conn = setup_discussion_test_db(); insert_issue( &conn, 1, 10, Some("Test"), Some("desc"), "opened", None, None, ); insert_label(&conn, 1, "backend"); insert_label(&conn, 2, "api"); link_issue_label(&conn, 1, 1); link_issue_label(&conn, 1, 2); insert_discussion(&conn, 1, "Issue", Some(1), None); insert_note( &conn, 1, 100, 1, Some("alice"), Some("Comment"), 1000, false, None, None, ); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); assert_eq!(doc.labels, vec!["api", "backend"]); } #[test] fn test_discussion_on_mr() { let conn = setup_discussion_test_db(); insert_mr( &conn, 1, 456, Some("JWT Auth"), Some("desc"), Some("opened"), Some("johndoe"), Some("feature/jwt"), Some("main"), Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"), ); insert_discussion(&conn, 1, "MergeRequest", None, Some(1)); insert_note( &conn, 1, 100, 1, Some("alice"), Some("LGTM"), 1000, false, None, None, ); let doc = extract_discussion_document(&conn, 1).unwrap().unwrap(); assert!( doc.content_text .contains("[[Discussion]] MR !456: JWT Auth\n") ); } #[test] fn test_discussion_all_system_notes() { let conn = setup_discussion_test_db(); insert_issue( &conn, 1, 10, Some("Test"), Some("desc"), "opened", None, None, ); insert_discussion(&conn, 1, "Issue", Some(1), None); insert_note( &conn, 1, 100, 1, Some("bot"), Some("assigned to @alice"), 1000, true, None, None, ); // All notes are system notes -> no content -> returns None let result = extract_discussion_document(&conn, 1).unwrap(); assert!(result.is_none()); } }