use chrono::DateTime; use rusqlite::Connection; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use std::collections::{BTreeSet, HashMap}; use std::fmt::Write as _; use super::truncation::{ MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap, }; use crate::core::error::Result; use crate::core::time::ms_to_iso; #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum SourceType { Issue, MergeRequest, Discussion, Note, } impl SourceType { pub fn as_str(&self) -> &'static str { match self { Self::Issue => "issue", Self::MergeRequest => "merge_request", Self::Discussion => "discussion", Self::Note => "note", } } pub fn parse(s: &str) -> Option { match s.to_lowercase().as_str() { "issue" | "issues" => Some(Self::Issue), "mr" | "mrs" | "merge_request" | "merge_requests" => Some(Self::MergeRequest), "discussion" | "discussions" => Some(Self::Discussion), "note" | "notes" => Some(Self::Note), _ => None, } } } impl std::fmt::Display for SourceType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.as_str()) } } #[derive(Debug, Clone)] pub struct DocumentData { pub source_type: SourceType, pub source_id: i64, pub project_id: i64, pub author_username: Option, pub labels: Vec, pub paths: Vec, pub labels_hash: String, pub paths_hash: String, pub created_at: i64, pub updated_at: i64, pub url: Option, pub title: Option, pub content_text: String, pub content_hash: String, pub is_truncated: bool, pub truncated_reason: Option, } pub fn compute_content_hash(content: &str) -> String { let mut hasher = Sha256::new(); hasher.update(content.as_bytes()); format!("{:x}", hasher.finalize()) } pub fn compute_list_hash(items: &[String]) -> String { let mut indices: Vec = (0..items.len()).collect(); indices.sort_by(|a, b| items[*a].cmp(&items[*b])); let mut hasher = Sha256::new(); for (i, &idx) in indices.iter().enumerate() { if i > 0 { hasher.update(b"\n"); } hasher.update(items[idx].as_bytes()); } format!("{:x}", hasher.finalize()) } pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result> { let row = conn.query_row( "SELECT i.id, i.iid, i.title, i.description, i.state, i.author_username, i.created_at, i.updated_at, i.web_url, p.path_with_namespace, p.id AS project_id FROM issues i JOIN projects p ON p.id = i.project_id WHERE i.id = ?1", rusqlite::params![issue_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, i64>(1)?, row.get::<_, Option>(2)?, row.get::<_, Option>(3)?, row.get::<_, String>(4)?, row.get::<_, Option>(5)?, row.get::<_, i64>(6)?, row.get::<_, i64>(7)?, row.get::<_, Option>(8)?, row.get::<_, String>(9)?, row.get::<_, i64>(10)?, )) }, ); let ( id, iid, title, description, state, author_username, created_at, updated_at, web_url, path_with_namespace, project_id, ) = match row { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM issue_labels il JOIN labels l ON l.id = il.label_id WHERE il.issue_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![id], |row| row.get(0))? .collect::, _>>()?; let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string()); let display_title = title.as_deref().unwrap_or("(untitled)"); let mut content = format!( "[[Issue]] #{}: {}\nProject: {}\n", iid, display_title, path_with_namespace ); if let Some(ref url) = web_url { let _ = writeln!(content, "URL: {}", url); } let _ = writeln!(content, "Labels: {}", labels_json); let _ = writeln!(content, "State: {}", state); if let Some(ref author) = author_username { let _ = writeln!(content, "Author: @{}", author); } if let Some(ref desc) = description { content.push_str("\n--- Description ---\n\n"); content.push_str(desc); } let labels_hash = compute_list_hash(&labels); let paths_hash = compute_list_hash(&[]); let hard_cap = truncate_hard_cap(&content); let content_hash = compute_content_hash(&hard_cap.content); Ok(Some(DocumentData { source_type: SourceType::Issue, source_id: id, project_id, author_username, labels, paths: Vec::new(), labels_hash, paths_hash, created_at, updated_at, url: web_url, title: Some(display_title.to_string()), content_text: hard_cap.content, content_hash, is_truncated: hard_cap.is_truncated, truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()), })) } pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result> { let row = conn.query_row( "SELECT m.id, m.iid, m.title, m.description, m.state, m.author_username, m.source_branch, m.target_branch, m.created_at, m.updated_at, m.web_url, p.path_with_namespace, p.id AS project_id FROM merge_requests m JOIN projects p ON p.id = m.project_id WHERE m.id = ?1", rusqlite::params![mr_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, i64>(1)?, row.get::<_, Option>(2)?, row.get::<_, Option>(3)?, row.get::<_, Option>(4)?, row.get::<_, Option>(5)?, row.get::<_, Option>(6)?, row.get::<_, Option>(7)?, row.get::<_, Option>(8)?, row.get::<_, Option>(9)?, row.get::<_, Option>(10)?, row.get::<_, String>(11)?, row.get::<_, i64>(12)?, )) }, ); let ( id, iid, title, description, state, author_username, source_branch, target_branch, created_at, updated_at, web_url, path_with_namespace, project_id, ) = match row { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM mr_labels ml JOIN labels l ON l.id = ml.label_id WHERE ml.merge_request_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![id], |row| row.get(0))? .collect::, _>>()?; let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string()); let display_title = title.as_deref().unwrap_or("(untitled)"); let display_state = state.as_deref().unwrap_or("unknown"); let mut content = format!( "[[MergeRequest]] !{}: {}\nProject: {}\n", iid, display_title, path_with_namespace ); if let Some(ref url) = web_url { let _ = writeln!(content, "URL: {}", url); } let _ = writeln!(content, "Labels: {}", labels_json); let _ = writeln!(content, "State: {}", display_state); if let Some(ref author) = author_username { let _ = writeln!(content, "Author: @{}", author); } if let (Some(src), Some(tgt)) = (&source_branch, &target_branch) { let _ = writeln!(content, "Source: {} -> {}", src, tgt); } if let Some(ref desc) = description { content.push_str("\n--- Description ---\n\n"); content.push_str(desc); } let labels_hash = compute_list_hash(&labels); let paths_hash = compute_list_hash(&[]); let hard_cap = truncate_hard_cap(&content); let content_hash = compute_content_hash(&hard_cap.content); Ok(Some(DocumentData { source_type: SourceType::MergeRequest, source_id: id, project_id, author_username, labels, paths: Vec::new(), labels_hash, paths_hash, created_at: created_at.unwrap_or(0), updated_at: updated_at.unwrap_or(0), url: web_url, title: Some(display_title.to_string()), content_text: hard_cap.content, content_hash, is_truncated: hard_cap.is_truncated, truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()), })) } fn format_date(ms: i64) -> String { DateTime::from_timestamp_millis(ms) .map(|dt| dt.format("%Y-%m-%d").to_string()) .unwrap_or_else(|| "unknown".to_string()) } pub fn extract_discussion_document( conn: &Connection, discussion_id: i64, ) -> Result> { let disc_row = conn.query_row( "SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id, p.path_with_namespace, p.id AS project_id FROM discussions d JOIN projects p ON p.id = d.project_id WHERE d.id = ?1", rusqlite::params![discussion_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, String>(1)?, row.get::<_, Option>(2)?, row.get::<_, Option>(3)?, row.get::<_, String>(4)?, row.get::<_, i64>(5)?, )) }, ); let (id, noteable_type, issue_id, merge_request_id, path_with_namespace, project_id) = match disc_row { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; let (_parent_iid, parent_title, parent_web_url, parent_type_prefix, labels) = match noteable_type.as_str() { "Issue" => { let parent_id = match issue_id { Some(pid) => pid, None => return Ok(None), }; let parent = conn.query_row( "SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1", rusqlite::params![parent_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, Option>(1)?, row.get::<_, Option>(2)?, )) }, ); let (iid, title, web_url) = match parent { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM issue_labels il JOIN labels l ON l.id = il.label_id WHERE il.issue_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![parent_id], |row| row.get(0))? .collect::, _>>()?; (iid, title, web_url, format!("Issue #{}", iid), labels) } "MergeRequest" => { let parent_id = match merge_request_id { Some(pid) => pid, None => return Ok(None), }; let parent = conn.query_row( "SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1", rusqlite::params![parent_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, Option>(1)?, row.get::<_, Option>(2)?, )) }, ); let (iid, title, web_url) = match parent { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM mr_labels ml JOIN labels l ON l.id = ml.label_id WHERE ml.merge_request_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![parent_id], |row| row.get(0))? .collect::, _>>()?; (iid, title, web_url, format!("MR !{}", iid), labels) } _ => return Ok(None), }; let mut note_stmt = conn.prepare_cached( "SELECT n.author_username, n.body, n.created_at, n.gitlab_id, n.note_type, n.position_old_path, n.position_new_path FROM notes n WHERE n.discussion_id = ?1 AND n.is_system = 0 ORDER BY n.created_at ASC, n.id ASC", )?; struct NoteRow { author: Option, body: Option, created_at: i64, gitlab_id: i64, old_path: Option, new_path: Option, } let notes: Vec = note_stmt .query_map(rusqlite::params![id], |row| { Ok(NoteRow { author: row.get(0)?, body: row.get(1)?, created_at: row.get(2)?, gitlab_id: row.get(3)?, old_path: row.get(5)?, new_path: row.get(6)?, }) })? .collect::, _>>()?; if notes.is_empty() { return Ok(None); } let mut path_set = BTreeSet::new(); for note in ¬es { if let Some(ref p) = note.old_path && !p.is_empty() { path_set.insert(p.clone()); } if let Some(ref p) = note.new_path && !p.is_empty() { path_set.insert(p.clone()); } } let paths: Vec = path_set.into_iter().collect(); let first_note_gitlab_id = notes[0].gitlab_id; let url = parent_web_url .as_ref() .map(|wu| format!("{}#note_{}", wu, first_note_gitlab_id)); let author_username = notes[0].author.clone(); let display_title = parent_title.as_deref().unwrap_or("(untitled)"); let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string()); let paths_json = serde_json::to_string(&paths).unwrap_or_else(|_| "[]".to_string()); let mut content = format!( "[[Discussion]] {}: {}\nProject: {}\n", parent_type_prefix, display_title, path_with_namespace ); if let Some(ref u) = url { let _ = writeln!(content, "URL: {}", u); } let _ = writeln!(content, "Labels: {}", labels_json); if !paths.is_empty() { let _ = writeln!(content, "Files: {}", paths_json); } let note_contents: Vec = notes .iter() .map(|note| NoteContent { author: note.author.as_deref().unwrap_or("unknown").to_string(), date: format_date(note.created_at), body: note.body.as_deref().unwrap_or("").to_string(), }) .collect(); let header_len = content.len() + "\n--- Thread ---\n\n".len(); let thread_budget = MAX_DISCUSSION_BYTES.saturating_sub(header_len); let thread_result = truncate_discussion(¬e_contents, thread_budget); content.push_str("\n--- Thread ---\n\n"); content.push_str(&thread_result.content); let created_at = notes[0].created_at; let updated_at = notes.last().map(|n| n.created_at).unwrap_or(created_at); let content_hash = compute_content_hash(&content); let labels_hash = compute_list_hash(&labels); let paths_hash = compute_list_hash(&paths); Ok(Some(DocumentData { source_type: SourceType::Discussion, source_id: id, project_id, author_username, labels, paths, labels_hash, paths_hash, created_at, updated_at, url, title: None, content_text: content, content_hash, is_truncated: thread_result.is_truncated, truncated_reason: thread_result.reason.map(|r| r.as_str().to_string()), })) } pub fn extract_note_document(conn: &Connection, note_id: i64) -> Result> { let row = conn.query_row( "SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system, n.created_at, n.updated_at, n.position_new_path, n.position_new_line, n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by, d.noteable_type, d.issue_id, d.merge_request_id, p.path_with_namespace, p.id AS project_id FROM notes n JOIN discussions d ON n.discussion_id = d.id JOIN projects p ON n.project_id = p.id WHERE n.id = ?1", rusqlite::params![note_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, i64>(1)?, row.get::<_, Option>(2)?, row.get::<_, Option>(3)?, row.get::<_, Option>(4)?, row.get::<_, bool>(5)?, row.get::<_, i64>(6)?, row.get::<_, i64>(7)?, row.get::<_, Option>(8)?, row.get::<_, Option>(9)?, row.get::<_, Option>(10)?, row.get::<_, Option>(11)?, row.get::<_, bool>(12)?, row.get::<_, bool>(13)?, row.get::<_, Option>(14)?, row.get::<_, String>(15)?, row.get::<_, Option>(16)?, row.get::<_, Option>(17)?, row.get::<_, String>(18)?, row.get::<_, i64>(19)?, )) }, ); let ( _id, gitlab_id, author_username, body, note_type, is_system, created_at, updated_at, position_new_path, position_new_line, position_old_path, _position_old_line, resolvable, resolved, _resolved_by, noteable_type, issue_id, merge_request_id, path_with_namespace, project_id, ) = match row { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; if is_system { return Ok(None); } let (parent_iid, parent_title, parent_web_url, parent_type_label, labels) = match noteable_type.as_str() { "Issue" => { let parent_id = match issue_id { Some(pid) => pid, None => return Ok(None), }; let parent = conn.query_row( "SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1", rusqlite::params![parent_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, Option>(1)?, row.get::<_, Option>(2)?, )) }, ); let (iid, title, web_url) = match parent { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM issue_labels il JOIN labels l ON l.id = il.label_id WHERE il.issue_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![parent_id], |row| row.get(0))? .collect::, _>>()?; (iid, title, web_url, "Issue", labels) } "MergeRequest" => { let parent_id = match merge_request_id { Some(pid) => pid, None => return Ok(None), }; let parent = conn.query_row( "SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1", rusqlite::params![parent_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, Option>(1)?, row.get::<_, Option>(2)?, )) }, ); let (iid, title, web_url) = match parent { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM mr_labels ml JOIN labels l ON l.id = ml.label_id WHERE ml.merge_request_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![parent_id], |row| row.get(0))? .collect::, _>>()?; (iid, title, web_url, "MergeRequest", labels) } _ => return Ok(None), }; build_note_document( note_id, gitlab_id, author_username, body, note_type, created_at, updated_at, position_new_path, position_new_line, position_old_path, resolvable, resolved, parent_iid, parent_title.as_deref(), parent_web_url.as_deref(), &labels, parent_type_label, &path_with_namespace, project_id, ) } pub struct ParentMetadata { pub iid: i64, pub title: Option, pub web_url: Option, pub labels: Vec, pub project_path: String, } pub struct ParentMetadataCache { cache: HashMap<(String, i64), Option>, } impl Default for ParentMetadataCache { fn default() -> Self { Self::new() } } impl ParentMetadataCache { pub fn new() -> Self { Self { cache: HashMap::new(), } } pub fn get_or_fetch( &mut self, conn: &Connection, noteable_type: &str, parent_id: i64, project_path: &str, ) -> Result> { let key = (noteable_type.to_string(), parent_id); if !self.cache.contains_key(&key) { let meta = fetch_parent_metadata(conn, noteable_type, parent_id, project_path)?; self.cache.insert(key.clone(), meta); } Ok(self.cache.get(&key).and_then(|m| m.as_ref())) } } fn fetch_parent_metadata( conn: &Connection, noteable_type: &str, parent_id: i64, project_path: &str, ) -> Result> { match noteable_type { "Issue" => { let parent = conn.query_row( "SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1", rusqlite::params![parent_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, Option>(1)?, row.get::<_, Option>(2)?, )) }, ); let (iid, title, web_url) = match parent { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM issue_labels il JOIN labels l ON l.id = il.label_id WHERE il.issue_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![parent_id], |row| row.get(0))? .collect::, _>>()?; Ok(Some(ParentMetadata { iid, title, web_url, labels, project_path: project_path.to_string(), })) } "MergeRequest" => { let parent = conn.query_row( "SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1", rusqlite::params![parent_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, Option>(1)?, row.get::<_, Option>(2)?, )) }, ); let (iid, title, web_url) = match parent { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; let mut label_stmt = conn.prepare_cached( "SELECT l.name FROM mr_labels ml JOIN labels l ON l.id = ml.label_id WHERE ml.merge_request_id = ?1 ORDER BY l.name", )?; let labels: Vec = label_stmt .query_map(rusqlite::params![parent_id], |row| row.get(0))? .collect::, _>>()?; Ok(Some(ParentMetadata { iid, title, web_url, labels, project_path: project_path.to_string(), })) } _ => Ok(None), } } pub fn extract_note_document_cached( conn: &Connection, note_id: i64, cache: &mut ParentMetadataCache, ) -> Result> { let row = conn.query_row( "SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system, n.created_at, n.updated_at, n.position_new_path, n.position_new_line, n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by, d.noteable_type, d.issue_id, d.merge_request_id, p.path_with_namespace, p.id AS project_id FROM notes n JOIN discussions d ON n.discussion_id = d.id JOIN projects p ON n.project_id = p.id WHERE n.id = ?1", rusqlite::params![note_id], |row| { Ok(( row.get::<_, i64>(0)?, row.get::<_, i64>(1)?, row.get::<_, Option>(2)?, row.get::<_, Option>(3)?, row.get::<_, Option>(4)?, row.get::<_, bool>(5)?, row.get::<_, i64>(6)?, row.get::<_, i64>(7)?, row.get::<_, Option>(8)?, row.get::<_, Option>(9)?, row.get::<_, Option>(10)?, row.get::<_, Option>(11)?, row.get::<_, bool>(12)?, row.get::<_, bool>(13)?, row.get::<_, Option>(14)?, row.get::<_, String>(15)?, row.get::<_, Option>(16)?, row.get::<_, Option>(17)?, row.get::<_, String>(18)?, row.get::<_, i64>(19)?, )) }, ); let ( _id, gitlab_id, author_username, body, note_type, is_system, created_at, updated_at, position_new_path, position_new_line, position_old_path, _position_old_line, resolvable, resolved, _resolved_by, noteable_type, issue_id, merge_request_id, path_with_namespace, project_id, ) = match row { Ok(r) => r, Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None), Err(e) => return Err(e.into()), }; if is_system { return Ok(None); } let parent_id = match noteable_type.as_str() { "Issue" => match issue_id { Some(pid) => pid, None => return Ok(None), }, "MergeRequest" => match merge_request_id { Some(pid) => pid, None => return Ok(None), }, _ => return Ok(None), }; let parent = cache.get_or_fetch(conn, ¬eable_type, parent_id, &path_with_namespace)?; let parent = match parent { Some(p) => p, None => return Ok(None), }; let parent_iid = parent.iid; let parent_title = parent.title.as_deref(); let parent_web_url = parent.web_url.as_deref(); let labels = parent.labels.clone(); let parent_type_label = noteable_type.as_str(); build_note_document( note_id, gitlab_id, author_username, body, note_type, created_at, updated_at, position_new_path, position_new_line, position_old_path, resolvable, resolved, parent_iid, parent_title, parent_web_url, &labels, parent_type_label, &path_with_namespace, project_id, ) } #[allow(clippy::too_many_arguments)] fn build_note_document( note_id: i64, gitlab_id: i64, author_username: Option, body: Option, note_type: Option, created_at: i64, updated_at: i64, position_new_path: Option, position_new_line: Option, position_old_path: Option, resolvable: bool, resolved: bool, parent_iid: i64, parent_title: Option<&str>, parent_web_url: Option<&str>, labels: &[String], parent_type_label: &str, path_with_namespace: &str, project_id: i64, ) -> Result> { let mut path_set = BTreeSet::new(); if let Some(ref p) = position_old_path && !p.is_empty() { path_set.insert(p.clone()); } if let Some(ref p) = position_new_path && !p.is_empty() { path_set.insert(p.clone()); } let paths: Vec = path_set.into_iter().collect(); let url = parent_web_url.map(|wu| format!("{}#note_{}", wu, gitlab_id)); let display_title = parent_title.unwrap_or("(untitled)"); let display_note_type = note_type.as_deref().unwrap_or("Note"); let display_author = author_username.as_deref().unwrap_or("unknown"); let parent_prefix = if parent_type_label == "Issue" { format!("Issue #{}", parent_iid) } else { format!("MR !{}", parent_iid) }; let title = format!( "Note by @{} on {}: {}", display_author, parent_prefix, display_title ); let labels_csv = labels.join(", "); let mut content = String::new(); let _ = writeln!(content, "[[Note]]"); let _ = writeln!(content, "source_type: note"); let _ = writeln!(content, "note_gitlab_id: {}", gitlab_id); let _ = writeln!(content, "project: {}", path_with_namespace); let _ = writeln!(content, "parent_type: {}", parent_type_label); let _ = writeln!(content, "parent_iid: {}", parent_iid); let _ = writeln!(content, "parent_title: {}", display_title); let _ = writeln!(content, "note_type: {}", display_note_type); let _ = writeln!(content, "author: @{}", display_author); let _ = writeln!(content, "created_at: {}", ms_to_iso(created_at)); if resolvable { let _ = writeln!(content, "resolved: {}", resolved); } if display_note_type == "DiffNote" && let Some(ref p) = position_new_path { if let Some(line) = position_new_line { let _ = writeln!(content, "path: {}:{}", p, line); } else { let _ = writeln!(content, "path: {}", p); } } if !labels.is_empty() { let _ = writeln!(content, "labels: {}", labels_csv); } if let Some(ref u) = url { let _ = writeln!(content, "url: {}", u); } content.push_str("\n--- Body ---\n\n"); content.push_str(body.as_deref().unwrap_or("")); let labels_hash = compute_list_hash(labels); let paths_hash = compute_list_hash(&paths); let hard_cap = truncate_hard_cap(&content); let content_hash = compute_content_hash(&hard_cap.content); Ok(Some(DocumentData { source_type: SourceType::Note, source_id: note_id, project_id, author_username, labels: labels.to_vec(), paths, labels_hash, paths_hash, created_at, updated_at, url, title: Some(title), content_text: hard_cap.content, content_hash, is_truncated: hard_cap.is_truncated, truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()), })) } #[cfg(test)] #[path = "extractor_tests.rs"] mod tests;