Files
gitlore/src/documents/extractor.rs
teernisse 83cd16c918 feat: implement per-note search and document pipeline
- Add SourceType::Note with extract_note_document() and ParentMetadataCache
- Migration 022: composite indexes for notes queries + author_id column
- Migration 024: table rebuild adding 'note' to CHECK constraints, defense triggers
- Migration 025: backfill existing non-system notes into dirty queue
- Add lore notes CLI command with 17 filter options (author, path, resolution, etc.)
- Support table/json/jsonl/csv output formats with field selection
- Wire note dirty tracking through discussion and MR discussion ingestion
- Fix test_migration_024_preserves_existing_data off-by-one (tested wrong migration)
- Fix upsert_document_inner returning false for label/path-only changes
2026-02-12 13:31:24 -05:00

2342 lines
72 KiB
Rust

use chrono::DateTime;
use rusqlite::Connection;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::collections::{BTreeSet, HashMap};
use std::fmt::Write as _;
use super::truncation::{
MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap,
};
use crate::core::error::Result;
use crate::core::time::ms_to_iso;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SourceType {
Issue,
MergeRequest,
Discussion,
Note,
}
impl SourceType {
pub fn as_str(&self) -> &'static str {
match self {
Self::Issue => "issue",
Self::MergeRequest => "merge_request",
Self::Discussion => "discussion",
Self::Note => "note",
}
}
pub fn parse(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"issue" | "issues" => Some(Self::Issue),
"mr" | "mrs" | "merge_request" | "merge_requests" => Some(Self::MergeRequest),
"discussion" | "discussions" => Some(Self::Discussion),
"note" | "notes" => Some(Self::Note),
_ => None,
}
}
}
impl std::fmt::Display for SourceType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
#[derive(Debug, Clone)]
pub struct DocumentData {
pub source_type: SourceType,
pub source_id: i64,
pub project_id: i64,
pub author_username: Option<String>,
pub labels: Vec<String>,
pub paths: Vec<String>,
pub labels_hash: String,
pub paths_hash: String,
pub created_at: i64,
pub updated_at: i64,
pub url: Option<String>,
pub title: Option<String>,
pub content_text: String,
pub content_hash: String,
pub is_truncated: bool,
pub truncated_reason: Option<String>,
}
pub fn compute_content_hash(content: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(content.as_bytes());
format!("{:x}", hasher.finalize())
}
pub fn compute_list_hash(items: &[String]) -> String {
let mut indices: Vec<usize> = (0..items.len()).collect();
indices.sort_by(|a, b| items[*a].cmp(&items[*b]));
let mut hasher = Sha256::new();
for (i, &idx) in indices.iter().enumerate() {
if i > 0 {
hasher.update(b"\n");
}
hasher.update(items[idx].as_bytes());
}
format!("{:x}", hasher.finalize())
}
pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option<DocumentData>> {
let row = conn.query_row(
"SELECT i.id, i.iid, i.title, i.description, i.state, i.author_username,
i.created_at, i.updated_at, i.web_url,
p.path_with_namespace, p.id AS project_id
FROM issues i
JOIN projects p ON p.id = i.project_id
WHERE i.id = ?1",
rusqlite::params![issue_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, String>(4)?,
row.get::<_, Option<String>>(5)?,
row.get::<_, i64>(6)?,
row.get::<_, i64>(7)?,
row.get::<_, Option<String>>(8)?,
row.get::<_, String>(9)?,
row.get::<_, i64>(10)?,
))
},
);
let (
id,
iid,
title,
description,
state,
author_username,
created_at,
updated_at,
web_url,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM issue_labels il
JOIN labels l ON l.id = il.label_id
WHERE il.issue_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
let display_title = title.as_deref().unwrap_or("(untitled)");
let mut content = format!(
"[[Issue]] #{}: {}\nProject: {}\n",
iid, display_title, path_with_namespace
);
if let Some(ref url) = web_url {
let _ = writeln!(content, "URL: {}", url);
}
let _ = writeln!(content, "Labels: {}", labels_json);
let _ = writeln!(content, "State: {}", state);
if let Some(ref author) = author_username {
let _ = writeln!(content, "Author: @{}", author);
}
if let Some(ref desc) = description {
content.push_str("\n--- Description ---\n\n");
content.push_str(desc);
}
let labels_hash = compute_list_hash(&labels);
let paths_hash = compute_list_hash(&[]);
let hard_cap = truncate_hard_cap(&content);
let content_hash = compute_content_hash(&hard_cap.content);
Ok(Some(DocumentData {
source_type: SourceType::Issue,
source_id: id,
project_id,
author_username,
labels,
paths: Vec::new(),
labels_hash,
paths_hash,
created_at,
updated_at,
url: web_url,
title: Some(display_title.to_string()),
content_text: hard_cap.content,
content_hash,
is_truncated: hard_cap.is_truncated,
truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
}))
}
pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<DocumentData>> {
let row = conn.query_row(
"SELECT m.id, m.iid, m.title, m.description, m.state, m.author_username,
m.source_branch, m.target_branch,
m.created_at, m.updated_at, m.web_url,
p.path_with_namespace, p.id AS project_id
FROM merge_requests m
JOIN projects p ON p.id = m.project_id
WHERE m.id = ?1",
rusqlite::params![mr_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, Option<String>>(4)?,
row.get::<_, Option<String>>(5)?,
row.get::<_, Option<String>>(6)?,
row.get::<_, Option<String>>(7)?,
row.get::<_, Option<i64>>(8)?,
row.get::<_, Option<i64>>(9)?,
row.get::<_, Option<String>>(10)?,
row.get::<_, String>(11)?,
row.get::<_, i64>(12)?,
))
},
);
let (
id,
iid,
title,
description,
state,
author_username,
source_branch,
target_branch,
created_at,
updated_at,
web_url,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM mr_labels ml
JOIN labels l ON l.id = ml.label_id
WHERE ml.merge_request_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
let display_title = title.as_deref().unwrap_or("(untitled)");
let display_state = state.as_deref().unwrap_or("unknown");
let mut content = format!(
"[[MergeRequest]] !{}: {}\nProject: {}\n",
iid, display_title, path_with_namespace
);
if let Some(ref url) = web_url {
let _ = writeln!(content, "URL: {}", url);
}
let _ = writeln!(content, "Labels: {}", labels_json);
let _ = writeln!(content, "State: {}", display_state);
if let Some(ref author) = author_username {
let _ = writeln!(content, "Author: @{}", author);
}
if let (Some(src), Some(tgt)) = (&source_branch, &target_branch) {
let _ = writeln!(content, "Source: {} -> {}", src, tgt);
}
if let Some(ref desc) = description {
content.push_str("\n--- Description ---\n\n");
content.push_str(desc);
}
let labels_hash = compute_list_hash(&labels);
let paths_hash = compute_list_hash(&[]);
let hard_cap = truncate_hard_cap(&content);
let content_hash = compute_content_hash(&hard_cap.content);
Ok(Some(DocumentData {
source_type: SourceType::MergeRequest,
source_id: id,
project_id,
author_username,
labels,
paths: Vec::new(),
labels_hash,
paths_hash,
created_at: created_at.unwrap_or(0),
updated_at: updated_at.unwrap_or(0),
url: web_url,
title: Some(display_title.to_string()),
content_text: hard_cap.content,
content_hash,
is_truncated: hard_cap.is_truncated,
truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
}))
}
fn format_date(ms: i64) -> String {
DateTime::from_timestamp_millis(ms)
.map(|dt| dt.format("%Y-%m-%d").to_string())
.unwrap_or_else(|| "unknown".to_string())
}
pub fn extract_discussion_document(
conn: &Connection,
discussion_id: i64,
) -> Result<Option<DocumentData>> {
let disc_row = conn.query_row(
"SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id,
p.path_with_namespace, p.id AS project_id
FROM discussions d
JOIN projects p ON p.id = d.project_id
WHERE d.id = ?1",
rusqlite::params![discussion_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, String>(1)?,
row.get::<_, Option<i64>>(2)?,
row.get::<_, Option<i64>>(3)?,
row.get::<_, String>(4)?,
row.get::<_, i64>(5)?,
))
},
);
let (id, noteable_type, issue_id, merge_request_id, path_with_namespace, project_id) =
match disc_row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let (_parent_iid, parent_title, parent_web_url, parent_type_prefix, labels) =
match noteable_type.as_str() {
"Issue" => {
let parent_id = match issue_id {
Some(pid) => pid,
None => return Ok(None),
};
let parent = conn.query_row(
"SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM issue_labels il
JOIN labels l ON l.id = il.label_id
WHERE il.issue_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
(iid, title, web_url, format!("Issue #{}", iid), labels)
}
"MergeRequest" => {
let parent_id = match merge_request_id {
Some(pid) => pid,
None => return Ok(None),
};
let parent = conn.query_row(
"SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM mr_labels ml
JOIN labels l ON l.id = ml.label_id
WHERE ml.merge_request_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
(iid, title, web_url, format!("MR !{}", iid), labels)
}
_ => return Ok(None),
};
let mut note_stmt = conn.prepare_cached(
"SELECT n.author_username, n.body, n.created_at, n.gitlab_id,
n.note_type, n.position_old_path, n.position_new_path
FROM notes n
WHERE n.discussion_id = ?1 AND n.is_system = 0
ORDER BY n.created_at ASC, n.id ASC",
)?;
struct NoteRow {
author: Option<String>,
body: Option<String>,
created_at: i64,
gitlab_id: i64,
old_path: Option<String>,
new_path: Option<String>,
}
let notes: Vec<NoteRow> = note_stmt
.query_map(rusqlite::params![id], |row| {
Ok(NoteRow {
author: row.get(0)?,
body: row.get(1)?,
created_at: row.get(2)?,
gitlab_id: row.get(3)?,
old_path: row.get(5)?,
new_path: row.get(6)?,
})
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
if notes.is_empty() {
return Ok(None);
}
let mut path_set = BTreeSet::new();
for note in &notes {
if let Some(ref p) = note.old_path
&& !p.is_empty()
{
path_set.insert(p.clone());
}
if let Some(ref p) = note.new_path
&& !p.is_empty()
{
path_set.insert(p.clone());
}
}
let paths: Vec<String> = path_set.into_iter().collect();
let first_note_gitlab_id = notes[0].gitlab_id;
let url = parent_web_url
.as_ref()
.map(|wu| format!("{}#note_{}", wu, first_note_gitlab_id));
let author_username = notes[0].author.clone();
let display_title = parent_title.as_deref().unwrap_or("(untitled)");
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
let paths_json = serde_json::to_string(&paths).unwrap_or_else(|_| "[]".to_string());
let mut content = format!(
"[[Discussion]] {}: {}\nProject: {}\n",
parent_type_prefix, display_title, path_with_namespace
);
if let Some(ref u) = url {
let _ = writeln!(content, "URL: {}", u);
}
let _ = writeln!(content, "Labels: {}", labels_json);
if !paths.is_empty() {
let _ = writeln!(content, "Files: {}", paths_json);
}
let note_contents: Vec<NoteContent> = notes
.iter()
.map(|note| NoteContent {
author: note.author.as_deref().unwrap_or("unknown").to_string(),
date: format_date(note.created_at),
body: note.body.as_deref().unwrap_or("").to_string(),
})
.collect();
let header_len = content.len() + "\n--- Thread ---\n\n".len();
let thread_budget = MAX_DISCUSSION_BYTES.saturating_sub(header_len);
let thread_result = truncate_discussion(&note_contents, thread_budget);
content.push_str("\n--- Thread ---\n\n");
content.push_str(&thread_result.content);
let created_at = notes[0].created_at;
let updated_at = notes.last().map(|n| n.created_at).unwrap_or(created_at);
let content_hash = compute_content_hash(&content);
let labels_hash = compute_list_hash(&labels);
let paths_hash = compute_list_hash(&paths);
Ok(Some(DocumentData {
source_type: SourceType::Discussion,
source_id: id,
project_id,
author_username,
labels,
paths,
labels_hash,
paths_hash,
created_at,
updated_at,
url,
title: None,
content_text: content,
content_hash,
is_truncated: thread_result.is_truncated,
truncated_reason: thread_result.reason.map(|r| r.as_str().to_string()),
}))
}
pub fn extract_note_document(conn: &Connection, note_id: i64) -> Result<Option<DocumentData>> {
let row = conn.query_row(
"SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system,
n.created_at, n.updated_at, n.position_new_path, n.position_new_line,
n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by,
d.noteable_type, d.issue_id, d.merge_request_id,
p.path_with_namespace, p.id AS project_id
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN projects p ON n.project_id = p.id
WHERE n.id = ?1",
rusqlite::params![note_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, Option<String>>(4)?,
row.get::<_, bool>(5)?,
row.get::<_, i64>(6)?,
row.get::<_, i64>(7)?,
row.get::<_, Option<String>>(8)?,
row.get::<_, Option<i64>>(9)?,
row.get::<_, Option<String>>(10)?,
row.get::<_, Option<i64>>(11)?,
row.get::<_, bool>(12)?,
row.get::<_, bool>(13)?,
row.get::<_, Option<String>>(14)?,
row.get::<_, String>(15)?,
row.get::<_, Option<i64>>(16)?,
row.get::<_, Option<i64>>(17)?,
row.get::<_, String>(18)?,
row.get::<_, i64>(19)?,
))
},
);
let (
_id,
gitlab_id,
author_username,
body,
note_type,
is_system,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
_position_old_line,
resolvable,
resolved,
_resolved_by,
noteable_type,
issue_id,
merge_request_id,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
if is_system {
return Ok(None);
}
let (parent_iid, parent_title, parent_web_url, parent_type_label, labels) =
match noteable_type.as_str() {
"Issue" => {
let parent_id = match issue_id {
Some(pid) => pid,
None => return Ok(None),
};
let parent = conn.query_row(
"SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM issue_labels il
JOIN labels l ON l.id = il.label_id
WHERE il.issue_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
(iid, title, web_url, "Issue", labels)
}
"MergeRequest" => {
let parent_id = match merge_request_id {
Some(pid) => pid,
None => return Ok(None),
};
let parent = conn.query_row(
"SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM mr_labels ml
JOIN labels l ON l.id = ml.label_id
WHERE ml.merge_request_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
(iid, title, web_url, "MergeRequest", labels)
}
_ => return Ok(None),
};
build_note_document(
note_id,
gitlab_id,
author_username,
body,
note_type,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
resolvable,
resolved,
parent_iid,
parent_title.as_deref(),
parent_web_url.as_deref(),
&labels,
parent_type_label,
&path_with_namespace,
project_id,
)
}
pub struct ParentMetadata {
pub iid: i64,
pub title: Option<String>,
pub web_url: Option<String>,
pub labels: Vec<String>,
pub project_path: String,
}
pub struct ParentMetadataCache {
cache: HashMap<(String, i64), Option<ParentMetadata>>,
}
impl Default for ParentMetadataCache {
fn default() -> Self {
Self::new()
}
}
impl ParentMetadataCache {
pub fn new() -> Self {
Self {
cache: HashMap::new(),
}
}
pub fn get_or_fetch(
&mut self,
conn: &Connection,
noteable_type: &str,
parent_id: i64,
project_path: &str,
) -> Result<Option<&ParentMetadata>> {
let key = (noteable_type.to_string(), parent_id);
if !self.cache.contains_key(&key) {
let meta = fetch_parent_metadata(conn, noteable_type, parent_id, project_path)?;
self.cache.insert(key.clone(), meta);
}
Ok(self.cache.get(&key).and_then(|m| m.as_ref()))
}
}
fn fetch_parent_metadata(
conn: &Connection,
noteable_type: &str,
parent_id: i64,
project_path: &str,
) -> Result<Option<ParentMetadata>> {
match noteable_type {
"Issue" => {
let parent = conn.query_row(
"SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM issue_labels il
JOIN labels l ON l.id = il.label_id
WHERE il.issue_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(Some(ParentMetadata {
iid,
title,
web_url,
labels,
project_path: project_path.to_string(),
}))
}
"MergeRequest" => {
let parent = conn.query_row(
"SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
rusqlite::params![parent_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
))
},
);
let (iid, title, web_url) = match parent {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
let mut label_stmt = conn.prepare_cached(
"SELECT l.name FROM mr_labels ml
JOIN labels l ON l.id = ml.label_id
WHERE ml.merge_request_id = ?1
ORDER BY l.name",
)?;
let labels: Vec<String> = label_stmt
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(Some(ParentMetadata {
iid,
title,
web_url,
labels,
project_path: project_path.to_string(),
}))
}
_ => Ok(None),
}
}
pub fn extract_note_document_cached(
conn: &Connection,
note_id: i64,
cache: &mut ParentMetadataCache,
) -> Result<Option<DocumentData>> {
let row = conn.query_row(
"SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system,
n.created_at, n.updated_at, n.position_new_path, n.position_new_line,
n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by,
d.noteable_type, d.issue_id, d.merge_request_id,
p.path_with_namespace, p.id AS project_id
FROM notes n
JOIN discussions d ON n.discussion_id = d.id
JOIN projects p ON n.project_id = p.id
WHERE n.id = ?1",
rusqlite::params![note_id],
|row| {
Ok((
row.get::<_, i64>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, Option<String>>(4)?,
row.get::<_, bool>(5)?,
row.get::<_, i64>(6)?,
row.get::<_, i64>(7)?,
row.get::<_, Option<String>>(8)?,
row.get::<_, Option<i64>>(9)?,
row.get::<_, Option<String>>(10)?,
row.get::<_, Option<i64>>(11)?,
row.get::<_, bool>(12)?,
row.get::<_, bool>(13)?,
row.get::<_, Option<String>>(14)?,
row.get::<_, String>(15)?,
row.get::<_, Option<i64>>(16)?,
row.get::<_, Option<i64>>(17)?,
row.get::<_, String>(18)?,
row.get::<_, i64>(19)?,
))
},
);
let (
_id,
gitlab_id,
author_username,
body,
note_type,
is_system,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
_position_old_line,
resolvable,
resolved,
_resolved_by,
noteable_type,
issue_id,
merge_request_id,
path_with_namespace,
project_id,
) = match row {
Ok(r) => r,
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
Err(e) => return Err(e.into()),
};
if is_system {
return Ok(None);
}
let parent_id = match noteable_type.as_str() {
"Issue" => match issue_id {
Some(pid) => pid,
None => return Ok(None),
},
"MergeRequest" => match merge_request_id {
Some(pid) => pid,
None => return Ok(None),
},
_ => return Ok(None),
};
let parent = cache.get_or_fetch(conn, &noteable_type, parent_id, &path_with_namespace)?;
let parent = match parent {
Some(p) => p,
None => return Ok(None),
};
let parent_iid = parent.iid;
let parent_title = parent.title.as_deref();
let parent_web_url = parent.web_url.as_deref();
let labels = parent.labels.clone();
let parent_type_label = noteable_type.as_str();
build_note_document(
note_id,
gitlab_id,
author_username,
body,
note_type,
created_at,
updated_at,
position_new_path,
position_new_line,
position_old_path,
resolvable,
resolved,
parent_iid,
parent_title,
parent_web_url,
&labels,
parent_type_label,
&path_with_namespace,
project_id,
)
}
#[allow(clippy::too_many_arguments)]
fn build_note_document(
note_id: i64,
gitlab_id: i64,
author_username: Option<String>,
body: Option<String>,
note_type: Option<String>,
created_at: i64,
updated_at: i64,
position_new_path: Option<String>,
position_new_line: Option<i64>,
position_old_path: Option<String>,
resolvable: bool,
resolved: bool,
parent_iid: i64,
parent_title: Option<&str>,
parent_web_url: Option<&str>,
labels: &[String],
parent_type_label: &str,
path_with_namespace: &str,
project_id: i64,
) -> Result<Option<DocumentData>> {
let mut path_set = BTreeSet::new();
if let Some(ref p) = position_old_path
&& !p.is_empty()
{
path_set.insert(p.clone());
}
if let Some(ref p) = position_new_path
&& !p.is_empty()
{
path_set.insert(p.clone());
}
let paths: Vec<String> = path_set.into_iter().collect();
let url = parent_web_url.map(|wu| format!("{}#note_{}", wu, gitlab_id));
let display_title = parent_title.unwrap_or("(untitled)");
let display_note_type = note_type.as_deref().unwrap_or("Note");
let display_author = author_username.as_deref().unwrap_or("unknown");
let parent_prefix = if parent_type_label == "Issue" {
format!("Issue #{}", parent_iid)
} else {
format!("MR !{}", parent_iid)
};
let title = format!(
"Note by @{} on {}: {}",
display_author, parent_prefix, display_title
);
let labels_csv = labels.join(", ");
let mut content = String::new();
let _ = writeln!(content, "[[Note]]");
let _ = writeln!(content, "source_type: note");
let _ = writeln!(content, "note_gitlab_id: {}", gitlab_id);
let _ = writeln!(content, "project: {}", path_with_namespace);
let _ = writeln!(content, "parent_type: {}", parent_type_label);
let _ = writeln!(content, "parent_iid: {}", parent_iid);
let _ = writeln!(content, "parent_title: {}", display_title);
let _ = writeln!(content, "note_type: {}", display_note_type);
let _ = writeln!(content, "author: @{}", display_author);
let _ = writeln!(content, "created_at: {}", ms_to_iso(created_at));
if resolvable {
let _ = writeln!(content, "resolved: {}", resolved);
}
if display_note_type == "DiffNote"
&& let Some(ref p) = position_new_path
{
if let Some(line) = position_new_line {
let _ = writeln!(content, "path: {}:{}", p, line);
} else {
let _ = writeln!(content, "path: {}", p);
}
}
if !labels.is_empty() {
let _ = writeln!(content, "labels: {}", labels_csv);
}
if let Some(ref u) = url {
let _ = writeln!(content, "url: {}", u);
}
content.push_str("\n--- Body ---\n\n");
content.push_str(body.as_deref().unwrap_or(""));
let labels_hash = compute_list_hash(labels);
let paths_hash = compute_list_hash(&paths);
let hard_cap = truncate_hard_cap(&content);
let content_hash = compute_content_hash(&hard_cap.content);
Ok(Some(DocumentData {
source_type: SourceType::Note,
source_id: note_id,
project_id,
author_username,
labels: labels.to_vec(),
paths,
labels_hash,
paths_hash,
created_at,
updated_at,
url,
title: Some(title),
content_text: hard_cap.content,
content_hash,
is_truncated: hard_cap.is_truncated,
truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_source_type_parse_aliases() {
assert_eq!(SourceType::parse("issue"), Some(SourceType::Issue));
assert_eq!(SourceType::parse("issues"), Some(SourceType::Issue));
assert_eq!(SourceType::parse("mr"), Some(SourceType::MergeRequest));
assert_eq!(SourceType::parse("mrs"), Some(SourceType::MergeRequest));
assert_eq!(
SourceType::parse("merge_request"),
Some(SourceType::MergeRequest)
);
assert_eq!(
SourceType::parse("merge_requests"),
Some(SourceType::MergeRequest)
);
assert_eq!(
SourceType::parse("discussion"),
Some(SourceType::Discussion)
);
assert_eq!(
SourceType::parse("discussions"),
Some(SourceType::Discussion)
);
assert_eq!(SourceType::parse("invalid"), None);
assert_eq!(SourceType::parse("ISSUE"), Some(SourceType::Issue));
}
#[test]
fn test_source_type_parse_note() {
assert_eq!(SourceType::parse("note"), Some(SourceType::Note));
}
#[test]
fn test_source_type_note_as_str() {
assert_eq!(SourceType::Note.as_str(), "note");
}
#[test]
fn test_source_type_note_display() {
assert_eq!(format!("{}", SourceType::Note), "note");
}
#[test]
fn test_source_type_parse_notes_alias() {
assert_eq!(SourceType::parse("notes"), Some(SourceType::Note));
}
#[test]
fn test_source_type_as_str() {
assert_eq!(SourceType::Issue.as_str(), "issue");
assert_eq!(SourceType::MergeRequest.as_str(), "merge_request");
assert_eq!(SourceType::Discussion.as_str(), "discussion");
}
#[test]
fn test_source_type_display() {
assert_eq!(format!("{}", SourceType::Issue), "issue");
assert_eq!(format!("{}", SourceType::MergeRequest), "merge_request");
assert_eq!(format!("{}", SourceType::Discussion), "discussion");
}
#[test]
fn test_content_hash_deterministic() {
let hash1 = compute_content_hash("hello");
let hash2 = compute_content_hash("hello");
assert_eq!(hash1, hash2);
assert!(!hash1.is_empty());
assert_eq!(hash1.len(), 64);
}
#[test]
fn test_content_hash_different_inputs() {
let hash1 = compute_content_hash("hello");
let hash2 = compute_content_hash("world");
assert_ne!(hash1, hash2);
}
#[test]
fn test_content_hash_empty() {
let hash = compute_content_hash("");
assert_eq!(hash.len(), 64);
}
#[test]
fn test_list_hash_order_independent() {
let hash1 = compute_list_hash(&["b".to_string(), "a".to_string()]);
let hash2 = compute_list_hash(&["a".to_string(), "b".to_string()]);
assert_eq!(hash1, hash2);
}
#[test]
fn test_list_hash_empty() {
let hash = compute_list_hash(&[]);
assert_eq!(hash.len(), 64);
let hash2 = compute_list_hash(&[]);
assert_eq!(hash, hash2);
}
fn setup_test_db() -> Connection {
let conn = Connection::open_in_memory().unwrap();
conn.execute_batch(
"
CREATE TABLE projects (
id INTEGER PRIMARY KEY,
gitlab_project_id INTEGER UNIQUE NOT NULL,
path_with_namespace TEXT NOT NULL,
default_branch TEXT,
web_url TEXT,
created_at INTEGER,
updated_at INTEGER,
raw_payload_id INTEGER
);
CREATE TABLE issues (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER UNIQUE NOT NULL,
project_id INTEGER NOT NULL REFERENCES projects(id),
iid INTEGER NOT NULL,
title TEXT,
description TEXT,
state TEXT NOT NULL,
author_username TEXT,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
last_seen_at INTEGER NOT NULL,
discussions_synced_for_updated_at INTEGER,
resource_events_synced_for_updated_at INTEGER,
web_url TEXT,
raw_payload_id INTEGER
);
CREATE TABLE labels (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER,
project_id INTEGER NOT NULL REFERENCES projects(id),
name TEXT NOT NULL,
color TEXT,
description TEXT
);
CREATE TABLE issue_labels (
issue_id INTEGER NOT NULL REFERENCES issues(id),
label_id INTEGER NOT NULL REFERENCES labels(id),
PRIMARY KEY(issue_id, label_id)
);
",
)
.unwrap();
conn.execute(
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) VALUES (1, 100, 'group/project-one', 'https://gitlab.example.com/group/project-one')",
[],
).unwrap();
conn
}
#[allow(clippy::too_many_arguments)]
fn insert_issue(
conn: &Connection,
id: i64,
iid: i64,
title: Option<&str>,
description: Option<&str>,
state: &str,
author: Option<&str>,
web_url: Option<&str>,
) {
conn.execute(
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, description, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, 1000, 2000, 3000, ?8)",
rusqlite::params![id, id * 10, iid, title, description, state, author, web_url],
).unwrap();
}
fn insert_label(conn: &Connection, id: i64, name: &str) {
conn.execute(
"INSERT INTO labels (id, project_id, name) VALUES (?1, 1, ?2)",
rusqlite::params![id, name],
)
.unwrap();
}
fn link_issue_label(conn: &Connection, issue_id: i64, label_id: i64) {
conn.execute(
"INSERT INTO issue_labels (issue_id, label_id) VALUES (?1, ?2)",
rusqlite::params![issue_id, label_id],
)
.unwrap();
}
#[test]
fn test_issue_document_format() {
let conn = setup_test_db();
insert_issue(
&conn,
1,
234,
Some("Authentication redesign"),
Some("We need to modernize our authentication system..."),
"opened",
Some("johndoe"),
Some("https://gitlab.example.com/group/project-one/-/issues/234"),
);
insert_label(&conn, 1, "auth");
insert_label(&conn, 2, "bug");
link_issue_label(&conn, 1, 1);
link_issue_label(&conn, 1, 2);
let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.source_type, SourceType::Issue);
assert_eq!(doc.source_id, 1);
assert_eq!(doc.project_id, 1);
assert_eq!(doc.author_username, Some("johndoe".to_string()));
assert!(
doc.content_text
.starts_with("[[Issue]] #234: Authentication redesign\n")
);
assert!(doc.content_text.contains("Project: group/project-one\n"));
assert!(
doc.content_text
.contains("URL: https://gitlab.example.com/group/project-one/-/issues/234\n")
);
assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n"));
assert!(doc.content_text.contains("State: opened\n"));
assert!(doc.content_text.contains("Author: @johndoe\n"));
assert!(
doc.content_text.contains(
"--- Description ---\n\nWe need to modernize our authentication system..."
)
);
assert!(!doc.is_truncated);
assert!(doc.paths.is_empty());
}
#[test]
fn test_issue_not_found() {
let conn = setup_test_db();
let result = extract_issue_document(&conn, 999).unwrap();
assert!(result.is_none());
}
#[test]
fn test_issue_no_description() {
let conn = setup_test_db();
insert_issue(
&conn,
1,
10,
Some("Quick fix"),
None,
"opened",
Some("alice"),
None,
);
let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
assert!(!doc.content_text.contains("--- Description ---"));
assert!(doc.content_text.contains("[[Issue]] #10: Quick fix\n"));
}
#[test]
fn test_issue_labels_sorted() {
let conn = setup_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("Body"),
"opened",
Some("bob"),
None,
);
insert_label(&conn, 1, "zeta");
insert_label(&conn, 2, "alpha");
insert_label(&conn, 3, "middle");
link_issue_label(&conn, 1, 1);
link_issue_label(&conn, 1, 2);
link_issue_label(&conn, 1, 3);
let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.labels, vec!["alpha", "middle", "zeta"]);
assert!(
doc.content_text
.contains("Labels: [\"alpha\",\"middle\",\"zeta\"]")
);
}
#[test]
fn test_issue_no_labels() {
let conn = setup_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("Body"),
"opened",
None,
None,
);
let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
assert!(doc.labels.is_empty());
assert!(doc.content_text.contains("Labels: []\n"));
}
#[test]
fn test_issue_hash_deterministic() {
let conn = setup_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("Body"),
"opened",
Some("alice"),
None,
);
let doc1 = extract_issue_document(&conn, 1).unwrap().unwrap();
let doc2 = extract_issue_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc1.content_hash, doc2.content_hash);
assert_eq!(doc1.labels_hash, doc2.labels_hash);
assert_eq!(doc1.content_hash.len(), 64);
}
#[test]
fn test_issue_empty_description() {
let conn = setup_test_db();
insert_issue(&conn, 1, 10, Some("Test"), Some(""), "opened", None, None);
let doc = extract_issue_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("--- Description ---\n\n"));
}
fn setup_mr_test_db() -> Connection {
let conn = setup_test_db();
conn.execute_batch(
"
CREATE TABLE merge_requests (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER UNIQUE NOT NULL,
project_id INTEGER NOT NULL REFERENCES projects(id),
iid INTEGER NOT NULL,
title TEXT,
description TEXT,
state TEXT,
draft INTEGER NOT NULL DEFAULT 0,
author_username TEXT,
source_branch TEXT,
target_branch TEXT,
head_sha TEXT,
references_short TEXT,
references_full TEXT,
detailed_merge_status TEXT,
merge_user_username TEXT,
created_at INTEGER,
updated_at INTEGER,
merged_at INTEGER,
closed_at INTEGER,
last_seen_at INTEGER NOT NULL,
discussions_synced_for_updated_at INTEGER,
discussions_sync_last_attempt_at INTEGER,
discussions_sync_attempts INTEGER DEFAULT 0,
discussions_sync_last_error TEXT,
resource_events_synced_for_updated_at INTEGER,
web_url TEXT,
raw_payload_id INTEGER
);
CREATE TABLE mr_labels (
merge_request_id INTEGER REFERENCES merge_requests(id),
label_id INTEGER REFERENCES labels(id),
PRIMARY KEY(merge_request_id, label_id)
);
",
)
.unwrap();
conn
}
#[allow(clippy::too_many_arguments)]
fn insert_mr(
conn: &Connection,
id: i64,
iid: i64,
title: Option<&str>,
description: Option<&str>,
state: Option<&str>,
author: Option<&str>,
source_branch: Option<&str>,
target_branch: Option<&str>,
web_url: Option<&str>,
) {
conn.execute(
"INSERT INTO merge_requests (id, gitlab_id, project_id, iid, title, description, state, author_username, source_branch, target_branch, created_at, updated_at, last_seen_at, web_url) VALUES (?1, ?2, 1, ?3, ?4, ?5, ?6, ?7, ?8, ?9, 1000, 2000, 3000, ?10)",
rusqlite::params![id, id * 10, iid, title, description, state, author, source_branch, target_branch, web_url],
).unwrap();
}
fn link_mr_label(conn: &Connection, mr_id: i64, label_id: i64) {
conn.execute(
"INSERT INTO mr_labels (merge_request_id, label_id) VALUES (?1, ?2)",
rusqlite::params![mr_id, label_id],
)
.unwrap();
}
#[test]
fn test_mr_document_format() {
let conn = setup_mr_test_db();
insert_mr(
&conn,
1,
456,
Some("Implement JWT authentication"),
Some("This MR implements JWT-based authentication..."),
Some("opened"),
Some("johndoe"),
Some("feature/jwt-auth"),
Some("main"),
Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"),
);
insert_label(&conn, 1, "auth");
insert_label(&conn, 2, "feature");
link_mr_label(&conn, 1, 1);
link_mr_label(&conn, 1, 2);
let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.source_type, SourceType::MergeRequest);
assert_eq!(doc.source_id, 1);
assert!(
doc.content_text
.starts_with("[[MergeRequest]] !456: Implement JWT authentication\n")
);
assert!(doc.content_text.contains("Project: group/project-one\n"));
assert!(
doc.content_text
.contains("Labels: [\"auth\",\"feature\"]\n")
);
assert!(doc.content_text.contains("State: opened\n"));
assert!(doc.content_text.contains("Author: @johndoe\n"));
assert!(
doc.content_text
.contains("Source: feature/jwt-auth -> main\n")
);
assert!(
doc.content_text
.contains("--- Description ---\n\nThis MR implements JWT-based authentication...")
);
}
#[test]
fn test_mr_not_found() {
let conn = setup_mr_test_db();
let result = extract_mr_document(&conn, 999).unwrap();
assert!(result.is_none());
}
#[test]
fn test_mr_no_description() {
let conn = setup_mr_test_db();
insert_mr(
&conn,
1,
10,
Some("Quick fix"),
None,
Some("merged"),
Some("alice"),
Some("fix/bug"),
Some("main"),
None,
);
let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
assert!(!doc.content_text.contains("--- Description ---"));
assert!(
doc.content_text
.contains("[[MergeRequest]] !10: Quick fix\n")
);
}
#[test]
fn test_mr_branch_info() {
let conn = setup_mr_test_db();
insert_mr(
&conn,
1,
10,
Some("Test"),
Some("Body"),
Some("opened"),
None,
Some("feature/foo"),
Some("develop"),
None,
);
let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
assert!(
doc.content_text
.contains("Source: feature/foo -> develop\n")
);
}
#[test]
fn test_mr_no_branches() {
let conn = setup_mr_test_db();
insert_mr(
&conn,
1,
10,
Some("Test"),
None,
Some("opened"),
None,
None,
None,
None,
);
let doc = extract_mr_document(&conn, 1).unwrap().unwrap();
assert!(!doc.content_text.contains("Source:"));
}
fn setup_discussion_test_db() -> Connection {
let conn = setup_mr_test_db();
conn.execute_batch(
"
CREATE TABLE discussions (
id INTEGER PRIMARY KEY,
gitlab_discussion_id TEXT NOT NULL,
project_id INTEGER NOT NULL REFERENCES projects(id),
issue_id INTEGER REFERENCES issues(id),
merge_request_id INTEGER,
noteable_type TEXT NOT NULL,
individual_note INTEGER NOT NULL DEFAULT 0,
first_note_at INTEGER,
last_note_at INTEGER,
last_seen_at INTEGER NOT NULL,
resolvable INTEGER NOT NULL DEFAULT 0,
resolved INTEGER NOT NULL DEFAULT 0
);
CREATE TABLE notes (
id INTEGER PRIMARY KEY,
gitlab_id INTEGER UNIQUE NOT NULL,
discussion_id INTEGER NOT NULL REFERENCES discussions(id),
project_id INTEGER NOT NULL REFERENCES projects(id),
note_type TEXT,
is_system INTEGER NOT NULL DEFAULT 0,
author_username TEXT,
body TEXT,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
last_seen_at INTEGER NOT NULL,
position INTEGER,
resolvable INTEGER NOT NULL DEFAULT 0,
resolved INTEGER NOT NULL DEFAULT 0,
resolved_by TEXT,
resolved_at INTEGER,
position_old_path TEXT,
position_new_path TEXT,
position_old_line INTEGER,
position_new_line INTEGER,
raw_payload_id INTEGER
);
",
)
.unwrap();
conn
}
fn insert_discussion(
conn: &Connection,
id: i64,
noteable_type: &str,
issue_id: Option<i64>,
mr_id: Option<i64>,
) {
conn.execute(
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, merge_request_id, noteable_type, last_seen_at) VALUES (?1, ?2, 1, ?3, ?4, ?5, 3000)",
rusqlite::params![id, format!("disc_{}", id), issue_id, mr_id, noteable_type],
).unwrap();
}
#[allow(clippy::too_many_arguments)]
fn insert_note(
conn: &Connection,
id: i64,
gitlab_id: i64,
discussion_id: i64,
author: Option<&str>,
body: Option<&str>,
created_at: i64,
is_system: bool,
old_path: Option<&str>,
new_path: Option<&str>,
) {
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position_old_path, position_new_path) VALUES (?1, ?2, ?3, 1, ?4, ?5, ?6, ?6, ?6, ?7, ?8, ?9)",
rusqlite::params![id, gitlab_id, discussion_id, author, body, created_at, is_system as i32, old_path, new_path],
).unwrap();
}
#[test]
fn test_discussion_document_format() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
234,
Some("Authentication redesign"),
Some("desc"),
"opened",
Some("johndoe"),
Some("https://gitlab.example.com/group/project-one/-/issues/234"),
);
insert_label(&conn, 1, "auth");
insert_label(&conn, 2, "bug");
link_issue_label(&conn, 1, 1);
link_issue_label(&conn, 1, 2);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
12345,
1,
Some("johndoe"),
Some("I think we should move to JWT-based auth..."),
1710460800000,
false,
None,
None,
);
insert_note(
&conn,
2,
12346,
1,
Some("janedoe"),
Some("Agreed. What about refresh token strategy?"),
1710460800000,
false,
None,
None,
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.source_type, SourceType::Discussion);
assert!(
doc.content_text
.starts_with("[[Discussion]] Issue #234: Authentication redesign\n")
);
assert!(doc.content_text.contains("Project: group/project-one\n"));
assert!(doc.content_text.contains(
"URL: https://gitlab.example.com/group/project-one/-/issues/234#note_12345\n"
));
assert!(doc.content_text.contains("Labels: [\"auth\",\"bug\"]\n"));
assert!(doc.content_text.contains("--- Thread ---"));
assert!(
doc.content_text
.contains("@johndoe (2024-03-15):\nI think we should move to JWT-based auth...")
);
assert!(
doc.content_text
.contains("@janedoe (2024-03-15):\nAgreed. What about refresh token strategy?")
);
assert_eq!(doc.author_username, Some("johndoe".to_string()));
assert!(doc.title.is_none());
}
#[test]
fn test_discussion_not_found() {
let conn = setup_discussion_test_db();
let result = extract_discussion_document(&conn, 999).unwrap();
assert!(result.is_none());
}
#[test]
fn test_discussion_parent_deleted() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
99,
10,
Some("To be deleted"),
None,
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(99), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Hello"),
1000,
false,
None,
None,
);
conn.execute("PRAGMA foreign_keys = OFF", []).unwrap();
conn.execute("DELETE FROM issues WHERE id = 99", [])
.unwrap();
conn.execute("PRAGMA foreign_keys = ON", []).unwrap();
let result = extract_discussion_document(&conn, 1).unwrap();
assert!(result.is_none());
}
#[test]
fn test_discussion_system_notes_excluded() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
Some("alice"),
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Real comment"),
1000,
false,
None,
None,
);
insert_note(
&conn,
2,
101,
1,
Some("bot"),
Some("assigned to @alice"),
2000,
true,
None,
None,
);
insert_note(
&conn,
3,
102,
1,
Some("bob"),
Some("Follow-up"),
3000,
false,
None,
None,
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("@alice"));
assert!(doc.content_text.contains("@bob"));
assert!(!doc.content_text.contains("assigned to"));
}
#[test]
fn test_discussion_diffnote_paths() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Comment on code"),
1000,
false,
Some("src/old.rs"),
Some("src/new.rs"),
);
insert_note(
&conn,
2,
101,
1,
Some("bob"),
Some("Reply"),
2000,
false,
Some("src/old.rs"),
Some("src/new.rs"),
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.paths, vec!["src/new.rs", "src/old.rs"]);
assert!(
doc.content_text
.contains("Files: [\"src/new.rs\",\"src/old.rs\"]")
);
}
#[test]
fn test_discussion_url_construction() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
Some("https://gitlab.example.com/group/project-one/-/issues/10"),
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
54321,
1,
Some("alice"),
Some("Hello"),
1000,
false,
None,
None,
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
assert_eq!(
doc.url,
Some("https://gitlab.example.com/group/project-one/-/issues/10#note_54321".to_string())
);
}
#[test]
fn test_discussion_uses_parent_labels() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_label(&conn, 1, "backend");
insert_label(&conn, 2, "api");
link_issue_label(&conn, 1, 1);
link_issue_label(&conn, 1, 2);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Comment"),
1000,
false,
None,
None,
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.labels, vec!["api", "backend"]);
}
#[test]
fn test_discussion_on_mr() {
let conn = setup_discussion_test_db();
insert_mr(
&conn,
1,
456,
Some("JWT Auth"),
Some("desc"),
Some("opened"),
Some("johndoe"),
Some("feature/jwt"),
Some("main"),
Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"),
);
insert_discussion(&conn, 1, "MergeRequest", None, Some(1));
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("LGTM"),
1000,
false,
None,
None,
);
let doc = extract_discussion_document(&conn, 1).unwrap().unwrap();
assert!(
doc.content_text
.contains("[[Discussion]] MR !456: JWT Auth\n")
);
}
#[test]
fn test_discussion_all_system_notes() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("bot"),
Some("assigned to @alice"),
1000,
true,
None,
None,
);
let result = extract_discussion_document(&conn, 1).unwrap();
assert!(result.is_none());
}
#[allow(clippy::too_many_arguments)]
fn insert_note_with_type(
conn: &Connection,
id: i64,
gitlab_id: i64,
discussion_id: i64,
author: Option<&str>,
body: Option<&str>,
created_at: i64,
is_system: bool,
old_path: Option<&str>,
new_path: Option<&str>,
old_line: Option<i64>,
new_line: Option<i64>,
note_type: Option<&str>,
resolvable: bool,
resolved: bool,
) {
conn.execute(
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position_old_path, position_new_path, position_old_line, position_new_line, note_type, resolvable, resolved) VALUES (?1, ?2, ?3, 1, ?4, ?5, ?6, ?6, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14)",
rusqlite::params![id, gitlab_id, discussion_id, author, body, created_at, is_system as i32, old_path, new_path, old_line, new_line, note_type, resolvable as i32, resolved as i32],
).unwrap();
}
#[test]
fn test_note_document_basic_format() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
42,
Some("Fix login bug"),
Some("desc"),
"opened",
Some("johndoe"),
Some("https://gitlab.example.com/group/project-one/-/issues/42"),
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
12345,
1,
Some("alice"),
Some("This looks like a race condition"),
1710460800000,
false,
None,
None,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.source_type, SourceType::Note);
assert_eq!(doc.source_id, 1);
assert_eq!(doc.project_id, 1);
assert_eq!(doc.author_username, Some("alice".to_string()));
assert!(doc.content_text.contains("[[Note]]"));
assert!(doc.content_text.contains("source_type: note"));
assert!(doc.content_text.contains("note_gitlab_id: 12345"));
assert!(doc.content_text.contains("project: group/project-one"));
assert!(doc.content_text.contains("parent_type: Issue"));
assert!(doc.content_text.contains("parent_iid: 42"));
assert!(doc.content_text.contains("parent_title: Fix login bug"));
assert!(doc.content_text.contains("author: @alice"));
assert!(doc.content_text.contains("--- Body ---"));
assert!(
doc.content_text
.contains("This looks like a race condition")
);
assert_eq!(
doc.title,
Some("Note by @alice on Issue #42: Fix login bug".to_string())
);
assert_eq!(
doc.url,
Some("https://gitlab.example.com/group/project-one/-/issues/42#note_12345".to_string())
);
}
#[test]
fn test_note_document_diffnote_with_path() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Refactor auth"),
Some("desc"),
"opened",
None,
Some("https://gitlab.example.com/group/project-one/-/issues/10"),
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note_with_type(
&conn,
1,
555,
1,
Some("bob"),
Some("Unused variable here"),
1000,
false,
Some("src/old_auth.rs"),
Some("src/auth.rs"),
Some(10),
Some(25),
Some("DiffNote"),
true,
false,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("note_type: DiffNote"));
assert!(doc.content_text.contains("path: src/auth.rs:25"));
assert!(doc.content_text.contains("resolved: false"));
assert_eq!(doc.paths, vec!["src/auth.rs", "src/old_auth.rs"]);
}
#[test]
fn test_note_document_inherits_parent_labels() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_label(&conn, 1, "backend");
insert_label(&conn, 2, "api");
link_issue_label(&conn, 1, 1);
link_issue_label(&conn, 1, 2);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Note body"),
1000,
false,
None,
None,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc.labels, vec!["api", "backend"]);
assert!(doc.content_text.contains("labels: api, backend"));
}
#[test]
fn test_note_document_mr_parent() {
let conn = setup_discussion_test_db();
insert_mr(
&conn,
1,
456,
Some("JWT Auth"),
Some("desc"),
Some("opened"),
Some("johndoe"),
Some("feature/jwt"),
Some("main"),
Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"),
);
insert_discussion(&conn, 1, "MergeRequest", None, Some(1));
insert_note(
&conn,
1,
200,
1,
Some("reviewer"),
Some("Needs tests"),
1000,
false,
None,
None,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("parent_type: MergeRequest"));
assert!(doc.content_text.contains("parent_iid: 456"));
assert_eq!(
doc.title,
Some("Note by @reviewer on MR !456: JWT Auth".to_string())
);
}
#[test]
fn test_note_document_system_note_returns_none() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("bot"),
Some("assigned to @alice"),
1000,
true,
None,
None,
);
let result = extract_note_document(&conn, 1).unwrap();
assert!(result.is_none());
}
#[test]
fn test_note_document_not_found() {
let conn = setup_discussion_test_db();
let result = extract_note_document(&conn, 999).unwrap();
assert!(result.is_none());
}
#[test]
fn test_note_document_orphaned_discussion() {
let conn = setup_discussion_test_db();
insert_discussion(&conn, 1, "Issue", None, None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Comment"),
1000,
false,
None,
None,
);
let result = extract_note_document(&conn, 1).unwrap();
assert!(result.is_none());
}
#[test]
fn test_note_document_hash_deterministic() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some("Comment"),
1000,
false,
None,
None,
);
let doc1 = extract_note_document(&conn, 1).unwrap().unwrap();
let doc2 = extract_note_document(&conn, 1).unwrap().unwrap();
assert_eq!(doc1.content_hash, doc2.content_hash);
assert_eq!(doc1.labels_hash, doc2.labels_hash);
assert_eq!(doc1.paths_hash, doc2.paths_hash);
assert_eq!(doc1.content_hash.len(), 64);
}
#[test]
fn test_note_document_empty_body() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
Some(""),
1000,
false,
None,
None,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("--- Body ---\n\n"));
assert!(!doc.is_truncated);
}
#[test]
fn test_note_document_null_body() {
let conn = setup_discussion_test_db();
insert_issue(
&conn,
1,
10,
Some("Test"),
Some("desc"),
"opened",
None,
None,
);
insert_discussion(&conn, 1, "Issue", Some(1), None);
insert_note(
&conn,
1,
100,
1,
Some("alice"),
None,
1000,
false,
None,
None,
);
let doc = extract_note_document(&conn, 1).unwrap().unwrap();
assert!(doc.content_text.contains("--- Body ---\n\n"));
assert!(doc.content_text.ends_with("--- Body ---\n\n"));
}
}