Move inline #[cfg(test)] mod tests { ... } blocks from 22 source files
into dedicated _tests.rs companion files, wired via:
#[cfg(test)]
#[path = "module_tests.rs"]
mod tests;
This keeps implementation-focused source files leaner and more scannable
while preserving full access to private items through `use super::*;`.
Modules extracted:
core: db, note_parser, payloads, project, references, sync_run,
timeline_collect, timeline_expand, timeline_seed
cli: list (55 tests), who (75 tests)
documents: extractor (43 tests), regenerator
embedding: change_detector, chunking
gitlab: graphql (wiremock async tests), transformers/issue
ingestion: dirty_tracker, discussions, issues, mr_diffs
Also adds conflicts_with("explain_score") to the --detail flag in the
who command to prevent mutually exclusive flags from being combined.
All 629 unit tests pass. No behavior changes.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1040 lines
34 KiB
Rust
1040 lines
34 KiB
Rust
use chrono::DateTime;
|
|
use rusqlite::Connection;
|
|
use serde::{Deserialize, Serialize};
|
|
use sha2::{Digest, Sha256};
|
|
use std::collections::{BTreeSet, HashMap};
|
|
use std::fmt::Write as _;
|
|
|
|
use super::truncation::{
|
|
MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap,
|
|
};
|
|
use crate::core::error::Result;
|
|
use crate::core::time::ms_to_iso;
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
|
#[serde(rename_all = "snake_case")]
|
|
pub enum SourceType {
|
|
Issue,
|
|
MergeRequest,
|
|
Discussion,
|
|
Note,
|
|
}
|
|
|
|
impl SourceType {
|
|
pub fn as_str(&self) -> &'static str {
|
|
match self {
|
|
Self::Issue => "issue",
|
|
Self::MergeRequest => "merge_request",
|
|
Self::Discussion => "discussion",
|
|
Self::Note => "note",
|
|
}
|
|
}
|
|
|
|
pub fn parse(s: &str) -> Option<Self> {
|
|
match s.to_lowercase().as_str() {
|
|
"issue" | "issues" => Some(Self::Issue),
|
|
"mr" | "mrs" | "merge_request" | "merge_requests" => Some(Self::MergeRequest),
|
|
"discussion" | "discussions" => Some(Self::Discussion),
|
|
"note" | "notes" => Some(Self::Note),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for SourceType {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "{}", self.as_str())
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct DocumentData {
|
|
pub source_type: SourceType,
|
|
pub source_id: i64,
|
|
pub project_id: i64,
|
|
pub author_username: Option<String>,
|
|
pub labels: Vec<String>,
|
|
pub paths: Vec<String>,
|
|
pub labels_hash: String,
|
|
pub paths_hash: String,
|
|
pub created_at: i64,
|
|
pub updated_at: i64,
|
|
pub url: Option<String>,
|
|
pub title: Option<String>,
|
|
pub content_text: String,
|
|
pub content_hash: String,
|
|
pub is_truncated: bool,
|
|
pub truncated_reason: Option<String>,
|
|
}
|
|
|
|
pub fn compute_content_hash(content: &str) -> String {
|
|
let mut hasher = Sha256::new();
|
|
hasher.update(content.as_bytes());
|
|
format!("{:x}", hasher.finalize())
|
|
}
|
|
|
|
pub fn compute_list_hash(items: &[String]) -> String {
|
|
let mut indices: Vec<usize> = (0..items.len()).collect();
|
|
indices.sort_by(|a, b| items[*a].cmp(&items[*b]));
|
|
let mut hasher = Sha256::new();
|
|
for (i, &idx) in indices.iter().enumerate() {
|
|
if i > 0 {
|
|
hasher.update(b"\n");
|
|
}
|
|
hasher.update(items[idx].as_bytes());
|
|
}
|
|
format!("{:x}", hasher.finalize())
|
|
}
|
|
|
|
pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option<DocumentData>> {
|
|
let row = conn.query_row(
|
|
"SELECT i.id, i.iid, i.title, i.description, i.state, i.author_username,
|
|
i.created_at, i.updated_at, i.web_url,
|
|
p.path_with_namespace, p.id AS project_id
|
|
FROM issues i
|
|
JOIN projects p ON p.id = i.project_id
|
|
WHERE i.id = ?1",
|
|
rusqlite::params![issue_id],
|
|
|row| {
|
|
Ok((
|
|
row.get::<_, i64>(0)?,
|
|
row.get::<_, i64>(1)?,
|
|
row.get::<_, Option<String>>(2)?,
|
|
row.get::<_, Option<String>>(3)?,
|
|
row.get::<_, String>(4)?,
|
|
row.get::<_, Option<String>>(5)?,
|
|
row.get::<_, i64>(6)?,
|
|
row.get::<_, i64>(7)?,
|
|
row.get::<_, Option<String>>(8)?,
|
|
row.get::<_, String>(9)?,
|
|
row.get::<_, i64>(10)?,
|
|
))
|
|
},
|
|
);
|
|
|
|
let (
|
|
id,
|
|
iid,
|
|
title,
|
|
description,
|
|
state,
|
|
author_username,
|
|
created_at,
|
|
updated_at,
|
|
web_url,
|
|
path_with_namespace,
|
|
project_id,
|
|
) = match row {
|
|
Ok(r) => r,
|
|
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
|
|
let mut label_stmt = conn.prepare_cached(
|
|
"SELECT l.name FROM issue_labels il
|
|
JOIN labels l ON l.id = il.label_id
|
|
WHERE il.issue_id = ?1
|
|
ORDER BY l.name",
|
|
)?;
|
|
let labels: Vec<String> = label_stmt
|
|
.query_map(rusqlite::params![id], |row| row.get(0))?
|
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
|
|
|
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
|
|
|
|
let display_title = title.as_deref().unwrap_or("(untitled)");
|
|
let mut content = format!(
|
|
"[[Issue]] #{}: {}\nProject: {}\n",
|
|
iid, display_title, path_with_namespace
|
|
);
|
|
if let Some(ref url) = web_url {
|
|
let _ = writeln!(content, "URL: {}", url);
|
|
}
|
|
let _ = writeln!(content, "Labels: {}", labels_json);
|
|
let _ = writeln!(content, "State: {}", state);
|
|
if let Some(ref author) = author_username {
|
|
let _ = writeln!(content, "Author: @{}", author);
|
|
}
|
|
|
|
if let Some(ref desc) = description {
|
|
content.push_str("\n--- Description ---\n\n");
|
|
content.push_str(desc);
|
|
}
|
|
|
|
let labels_hash = compute_list_hash(&labels);
|
|
let paths_hash = compute_list_hash(&[]);
|
|
|
|
let hard_cap = truncate_hard_cap(&content);
|
|
let content_hash = compute_content_hash(&hard_cap.content);
|
|
|
|
Ok(Some(DocumentData {
|
|
source_type: SourceType::Issue,
|
|
source_id: id,
|
|
project_id,
|
|
author_username,
|
|
labels,
|
|
paths: Vec::new(),
|
|
labels_hash,
|
|
paths_hash,
|
|
created_at,
|
|
updated_at,
|
|
url: web_url,
|
|
title: Some(display_title.to_string()),
|
|
content_text: hard_cap.content,
|
|
content_hash,
|
|
is_truncated: hard_cap.is_truncated,
|
|
truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
|
|
}))
|
|
}
|
|
|
|
pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<DocumentData>> {
|
|
let row = conn.query_row(
|
|
"SELECT m.id, m.iid, m.title, m.description, m.state, m.author_username,
|
|
m.source_branch, m.target_branch,
|
|
m.created_at, m.updated_at, m.web_url,
|
|
p.path_with_namespace, p.id AS project_id
|
|
FROM merge_requests m
|
|
JOIN projects p ON p.id = m.project_id
|
|
WHERE m.id = ?1",
|
|
rusqlite::params![mr_id],
|
|
|row| {
|
|
Ok((
|
|
row.get::<_, i64>(0)?,
|
|
row.get::<_, i64>(1)?,
|
|
row.get::<_, Option<String>>(2)?,
|
|
row.get::<_, Option<String>>(3)?,
|
|
row.get::<_, Option<String>>(4)?,
|
|
row.get::<_, Option<String>>(5)?,
|
|
row.get::<_, Option<String>>(6)?,
|
|
row.get::<_, Option<String>>(7)?,
|
|
row.get::<_, Option<i64>>(8)?,
|
|
row.get::<_, Option<i64>>(9)?,
|
|
row.get::<_, Option<String>>(10)?,
|
|
row.get::<_, String>(11)?,
|
|
row.get::<_, i64>(12)?,
|
|
))
|
|
},
|
|
);
|
|
|
|
let (
|
|
id,
|
|
iid,
|
|
title,
|
|
description,
|
|
state,
|
|
author_username,
|
|
source_branch,
|
|
target_branch,
|
|
created_at,
|
|
updated_at,
|
|
web_url,
|
|
path_with_namespace,
|
|
project_id,
|
|
) = match row {
|
|
Ok(r) => r,
|
|
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
|
|
let mut label_stmt = conn.prepare_cached(
|
|
"SELECT l.name FROM mr_labels ml
|
|
JOIN labels l ON l.id = ml.label_id
|
|
WHERE ml.merge_request_id = ?1
|
|
ORDER BY l.name",
|
|
)?;
|
|
let labels: Vec<String> = label_stmt
|
|
.query_map(rusqlite::params![id], |row| row.get(0))?
|
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
|
|
|
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
|
|
|
|
let display_title = title.as_deref().unwrap_or("(untitled)");
|
|
let display_state = state.as_deref().unwrap_or("unknown");
|
|
let mut content = format!(
|
|
"[[MergeRequest]] !{}: {}\nProject: {}\n",
|
|
iid, display_title, path_with_namespace
|
|
);
|
|
if let Some(ref url) = web_url {
|
|
let _ = writeln!(content, "URL: {}", url);
|
|
}
|
|
let _ = writeln!(content, "Labels: {}", labels_json);
|
|
let _ = writeln!(content, "State: {}", display_state);
|
|
if let Some(ref author) = author_username {
|
|
let _ = writeln!(content, "Author: @{}", author);
|
|
}
|
|
if let (Some(src), Some(tgt)) = (&source_branch, &target_branch) {
|
|
let _ = writeln!(content, "Source: {} -> {}", src, tgt);
|
|
}
|
|
|
|
if let Some(ref desc) = description {
|
|
content.push_str("\n--- Description ---\n\n");
|
|
content.push_str(desc);
|
|
}
|
|
|
|
let labels_hash = compute_list_hash(&labels);
|
|
let paths_hash = compute_list_hash(&[]);
|
|
|
|
let hard_cap = truncate_hard_cap(&content);
|
|
let content_hash = compute_content_hash(&hard_cap.content);
|
|
|
|
Ok(Some(DocumentData {
|
|
source_type: SourceType::MergeRequest,
|
|
source_id: id,
|
|
project_id,
|
|
author_username,
|
|
labels,
|
|
paths: Vec::new(),
|
|
labels_hash,
|
|
paths_hash,
|
|
created_at: created_at.unwrap_or(0),
|
|
updated_at: updated_at.unwrap_or(0),
|
|
url: web_url,
|
|
title: Some(display_title.to_string()),
|
|
content_text: hard_cap.content,
|
|
content_hash,
|
|
is_truncated: hard_cap.is_truncated,
|
|
truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
|
|
}))
|
|
}
|
|
|
|
fn format_date(ms: i64) -> String {
|
|
DateTime::from_timestamp_millis(ms)
|
|
.map(|dt| dt.format("%Y-%m-%d").to_string())
|
|
.unwrap_or_else(|| "unknown".to_string())
|
|
}
|
|
|
|
pub fn extract_discussion_document(
|
|
conn: &Connection,
|
|
discussion_id: i64,
|
|
) -> Result<Option<DocumentData>> {
|
|
let disc_row = conn.query_row(
|
|
"SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id,
|
|
p.path_with_namespace, p.id AS project_id
|
|
FROM discussions d
|
|
JOIN projects p ON p.id = d.project_id
|
|
WHERE d.id = ?1",
|
|
rusqlite::params![discussion_id],
|
|
|row| {
|
|
Ok((
|
|
row.get::<_, i64>(0)?,
|
|
row.get::<_, String>(1)?,
|
|
row.get::<_, Option<i64>>(2)?,
|
|
row.get::<_, Option<i64>>(3)?,
|
|
row.get::<_, String>(4)?,
|
|
row.get::<_, i64>(5)?,
|
|
))
|
|
},
|
|
);
|
|
|
|
let (id, noteable_type, issue_id, merge_request_id, path_with_namespace, project_id) =
|
|
match disc_row {
|
|
Ok(r) => r,
|
|
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
|
|
let (_parent_iid, parent_title, parent_web_url, parent_type_prefix, labels) =
|
|
match noteable_type.as_str() {
|
|
"Issue" => {
|
|
let parent_id = match issue_id {
|
|
Some(pid) => pid,
|
|
None => return Ok(None),
|
|
};
|
|
let parent = conn.query_row(
|
|
"SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
|
|
rusqlite::params![parent_id],
|
|
|row| {
|
|
Ok((
|
|
row.get::<_, i64>(0)?,
|
|
row.get::<_, Option<String>>(1)?,
|
|
row.get::<_, Option<String>>(2)?,
|
|
))
|
|
},
|
|
);
|
|
let (iid, title, web_url) = match parent {
|
|
Ok(r) => r,
|
|
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
let mut label_stmt = conn.prepare_cached(
|
|
"SELECT l.name FROM issue_labels il
|
|
JOIN labels l ON l.id = il.label_id
|
|
WHERE il.issue_id = ?1
|
|
ORDER BY l.name",
|
|
)?;
|
|
let labels: Vec<String> = label_stmt
|
|
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
|
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
|
|
|
(iid, title, web_url, format!("Issue #{}", iid), labels)
|
|
}
|
|
"MergeRequest" => {
|
|
let parent_id = match merge_request_id {
|
|
Some(pid) => pid,
|
|
None => return Ok(None),
|
|
};
|
|
let parent = conn.query_row(
|
|
"SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
|
|
rusqlite::params![parent_id],
|
|
|row| {
|
|
Ok((
|
|
row.get::<_, i64>(0)?,
|
|
row.get::<_, Option<String>>(1)?,
|
|
row.get::<_, Option<String>>(2)?,
|
|
))
|
|
},
|
|
);
|
|
let (iid, title, web_url) = match parent {
|
|
Ok(r) => r,
|
|
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
let mut label_stmt = conn.prepare_cached(
|
|
"SELECT l.name FROM mr_labels ml
|
|
JOIN labels l ON l.id = ml.label_id
|
|
WHERE ml.merge_request_id = ?1
|
|
ORDER BY l.name",
|
|
)?;
|
|
let labels: Vec<String> = label_stmt
|
|
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
|
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
|
|
|
(iid, title, web_url, format!("MR !{}", iid), labels)
|
|
}
|
|
_ => return Ok(None),
|
|
};
|
|
|
|
let mut note_stmt = conn.prepare_cached(
|
|
"SELECT n.author_username, n.body, n.created_at, n.gitlab_id,
|
|
n.note_type, n.position_old_path, n.position_new_path
|
|
FROM notes n
|
|
WHERE n.discussion_id = ?1 AND n.is_system = 0
|
|
ORDER BY n.created_at ASC, n.id ASC",
|
|
)?;
|
|
|
|
struct NoteRow {
|
|
author: Option<String>,
|
|
body: Option<String>,
|
|
created_at: i64,
|
|
gitlab_id: i64,
|
|
old_path: Option<String>,
|
|
new_path: Option<String>,
|
|
}
|
|
|
|
let notes: Vec<NoteRow> = note_stmt
|
|
.query_map(rusqlite::params![id], |row| {
|
|
Ok(NoteRow {
|
|
author: row.get(0)?,
|
|
body: row.get(1)?,
|
|
created_at: row.get(2)?,
|
|
gitlab_id: row.get(3)?,
|
|
old_path: row.get(5)?,
|
|
new_path: row.get(6)?,
|
|
})
|
|
})?
|
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
|
|
|
if notes.is_empty() {
|
|
return Ok(None);
|
|
}
|
|
|
|
let mut path_set = BTreeSet::new();
|
|
for note in ¬es {
|
|
if let Some(ref p) = note.old_path
|
|
&& !p.is_empty()
|
|
{
|
|
path_set.insert(p.clone());
|
|
}
|
|
if let Some(ref p) = note.new_path
|
|
&& !p.is_empty()
|
|
{
|
|
path_set.insert(p.clone());
|
|
}
|
|
}
|
|
let paths: Vec<String> = path_set.into_iter().collect();
|
|
|
|
let first_note_gitlab_id = notes[0].gitlab_id;
|
|
let url = parent_web_url
|
|
.as_ref()
|
|
.map(|wu| format!("{}#note_{}", wu, first_note_gitlab_id));
|
|
|
|
let author_username = notes[0].author.clone();
|
|
|
|
let display_title = parent_title.as_deref().unwrap_or("(untitled)");
|
|
let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
|
|
let paths_json = serde_json::to_string(&paths).unwrap_or_else(|_| "[]".to_string());
|
|
|
|
let mut content = format!(
|
|
"[[Discussion]] {}: {}\nProject: {}\n",
|
|
parent_type_prefix, display_title, path_with_namespace
|
|
);
|
|
if let Some(ref u) = url {
|
|
let _ = writeln!(content, "URL: {}", u);
|
|
}
|
|
let _ = writeln!(content, "Labels: {}", labels_json);
|
|
if !paths.is_empty() {
|
|
let _ = writeln!(content, "Files: {}", paths_json);
|
|
}
|
|
|
|
let note_contents: Vec<NoteContent> = notes
|
|
.iter()
|
|
.map(|note| NoteContent {
|
|
author: note.author.as_deref().unwrap_or("unknown").to_string(),
|
|
date: format_date(note.created_at),
|
|
body: note.body.as_deref().unwrap_or("").to_string(),
|
|
})
|
|
.collect();
|
|
|
|
let header_len = content.len() + "\n--- Thread ---\n\n".len();
|
|
let thread_budget = MAX_DISCUSSION_BYTES.saturating_sub(header_len);
|
|
|
|
let thread_result = truncate_discussion(¬e_contents, thread_budget);
|
|
content.push_str("\n--- Thread ---\n\n");
|
|
content.push_str(&thread_result.content);
|
|
|
|
let created_at = notes[0].created_at;
|
|
let updated_at = notes.last().map(|n| n.created_at).unwrap_or(created_at);
|
|
|
|
let content_hash = compute_content_hash(&content);
|
|
let labels_hash = compute_list_hash(&labels);
|
|
let paths_hash = compute_list_hash(&paths);
|
|
|
|
Ok(Some(DocumentData {
|
|
source_type: SourceType::Discussion,
|
|
source_id: id,
|
|
project_id,
|
|
author_username,
|
|
labels,
|
|
paths,
|
|
labels_hash,
|
|
paths_hash,
|
|
created_at,
|
|
updated_at,
|
|
url,
|
|
title: None,
|
|
content_text: content,
|
|
content_hash,
|
|
is_truncated: thread_result.is_truncated,
|
|
truncated_reason: thread_result.reason.map(|r| r.as_str().to_string()),
|
|
}))
|
|
}
|
|
|
|
pub fn extract_note_document(conn: &Connection, note_id: i64) -> Result<Option<DocumentData>> {
|
|
let row = conn.query_row(
|
|
"SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system,
|
|
n.created_at, n.updated_at, n.position_new_path, n.position_new_line,
|
|
n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by,
|
|
d.noteable_type, d.issue_id, d.merge_request_id,
|
|
p.path_with_namespace, p.id AS project_id
|
|
FROM notes n
|
|
JOIN discussions d ON n.discussion_id = d.id
|
|
JOIN projects p ON n.project_id = p.id
|
|
WHERE n.id = ?1",
|
|
rusqlite::params![note_id],
|
|
|row| {
|
|
Ok((
|
|
row.get::<_, i64>(0)?,
|
|
row.get::<_, i64>(1)?,
|
|
row.get::<_, Option<String>>(2)?,
|
|
row.get::<_, Option<String>>(3)?,
|
|
row.get::<_, Option<String>>(4)?,
|
|
row.get::<_, bool>(5)?,
|
|
row.get::<_, i64>(6)?,
|
|
row.get::<_, i64>(7)?,
|
|
row.get::<_, Option<String>>(8)?,
|
|
row.get::<_, Option<i64>>(9)?,
|
|
row.get::<_, Option<String>>(10)?,
|
|
row.get::<_, Option<i64>>(11)?,
|
|
row.get::<_, bool>(12)?,
|
|
row.get::<_, bool>(13)?,
|
|
row.get::<_, Option<String>>(14)?,
|
|
row.get::<_, String>(15)?,
|
|
row.get::<_, Option<i64>>(16)?,
|
|
row.get::<_, Option<i64>>(17)?,
|
|
row.get::<_, String>(18)?,
|
|
row.get::<_, i64>(19)?,
|
|
))
|
|
},
|
|
);
|
|
|
|
let (
|
|
_id,
|
|
gitlab_id,
|
|
author_username,
|
|
body,
|
|
note_type,
|
|
is_system,
|
|
created_at,
|
|
updated_at,
|
|
position_new_path,
|
|
position_new_line,
|
|
position_old_path,
|
|
_position_old_line,
|
|
resolvable,
|
|
resolved,
|
|
_resolved_by,
|
|
noteable_type,
|
|
issue_id,
|
|
merge_request_id,
|
|
path_with_namespace,
|
|
project_id,
|
|
) = match row {
|
|
Ok(r) => r,
|
|
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
|
|
if is_system {
|
|
return Ok(None);
|
|
}
|
|
|
|
let (parent_iid, parent_title, parent_web_url, parent_type_label, labels) =
|
|
match noteable_type.as_str() {
|
|
"Issue" => {
|
|
let parent_id = match issue_id {
|
|
Some(pid) => pid,
|
|
None => return Ok(None),
|
|
};
|
|
let parent = conn.query_row(
|
|
"SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
|
|
rusqlite::params![parent_id],
|
|
|row| {
|
|
Ok((
|
|
row.get::<_, i64>(0)?,
|
|
row.get::<_, Option<String>>(1)?,
|
|
row.get::<_, Option<String>>(2)?,
|
|
))
|
|
},
|
|
);
|
|
let (iid, title, web_url) = match parent {
|
|
Ok(r) => r,
|
|
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
let mut label_stmt = conn.prepare_cached(
|
|
"SELECT l.name FROM issue_labels il
|
|
JOIN labels l ON l.id = il.label_id
|
|
WHERE il.issue_id = ?1
|
|
ORDER BY l.name",
|
|
)?;
|
|
let labels: Vec<String> = label_stmt
|
|
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
|
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
|
|
|
(iid, title, web_url, "Issue", labels)
|
|
}
|
|
"MergeRequest" => {
|
|
let parent_id = match merge_request_id {
|
|
Some(pid) => pid,
|
|
None => return Ok(None),
|
|
};
|
|
let parent = conn.query_row(
|
|
"SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
|
|
rusqlite::params![parent_id],
|
|
|row| {
|
|
Ok((
|
|
row.get::<_, i64>(0)?,
|
|
row.get::<_, Option<String>>(1)?,
|
|
row.get::<_, Option<String>>(2)?,
|
|
))
|
|
},
|
|
);
|
|
let (iid, title, web_url) = match parent {
|
|
Ok(r) => r,
|
|
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
let mut label_stmt = conn.prepare_cached(
|
|
"SELECT l.name FROM mr_labels ml
|
|
JOIN labels l ON l.id = ml.label_id
|
|
WHERE ml.merge_request_id = ?1
|
|
ORDER BY l.name",
|
|
)?;
|
|
let labels: Vec<String> = label_stmt
|
|
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
|
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
|
|
|
(iid, title, web_url, "MergeRequest", labels)
|
|
}
|
|
_ => return Ok(None),
|
|
};
|
|
|
|
build_note_document(
|
|
note_id,
|
|
gitlab_id,
|
|
author_username,
|
|
body,
|
|
note_type,
|
|
created_at,
|
|
updated_at,
|
|
position_new_path,
|
|
position_new_line,
|
|
position_old_path,
|
|
resolvable,
|
|
resolved,
|
|
parent_iid,
|
|
parent_title.as_deref(),
|
|
parent_web_url.as_deref(),
|
|
&labels,
|
|
parent_type_label,
|
|
&path_with_namespace,
|
|
project_id,
|
|
)
|
|
}
|
|
|
|
pub struct ParentMetadata {
|
|
pub iid: i64,
|
|
pub title: Option<String>,
|
|
pub web_url: Option<String>,
|
|
pub labels: Vec<String>,
|
|
pub project_path: String,
|
|
}
|
|
|
|
pub struct ParentMetadataCache {
|
|
cache: HashMap<(String, i64), Option<ParentMetadata>>,
|
|
}
|
|
|
|
impl Default for ParentMetadataCache {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl ParentMetadataCache {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
cache: HashMap::new(),
|
|
}
|
|
}
|
|
|
|
pub fn get_or_fetch(
|
|
&mut self,
|
|
conn: &Connection,
|
|
noteable_type: &str,
|
|
parent_id: i64,
|
|
project_path: &str,
|
|
) -> Result<Option<&ParentMetadata>> {
|
|
let key = (noteable_type.to_string(), parent_id);
|
|
if !self.cache.contains_key(&key) {
|
|
let meta = fetch_parent_metadata(conn, noteable_type, parent_id, project_path)?;
|
|
self.cache.insert(key.clone(), meta);
|
|
}
|
|
Ok(self.cache.get(&key).and_then(|m| m.as_ref()))
|
|
}
|
|
}
|
|
|
|
fn fetch_parent_metadata(
|
|
conn: &Connection,
|
|
noteable_type: &str,
|
|
parent_id: i64,
|
|
project_path: &str,
|
|
) -> Result<Option<ParentMetadata>> {
|
|
match noteable_type {
|
|
"Issue" => {
|
|
let parent = conn.query_row(
|
|
"SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
|
|
rusqlite::params![parent_id],
|
|
|row| {
|
|
Ok((
|
|
row.get::<_, i64>(0)?,
|
|
row.get::<_, Option<String>>(1)?,
|
|
row.get::<_, Option<String>>(2)?,
|
|
))
|
|
},
|
|
);
|
|
let (iid, title, web_url) = match parent {
|
|
Ok(r) => r,
|
|
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
let mut label_stmt = conn.prepare_cached(
|
|
"SELECT l.name FROM issue_labels il
|
|
JOIN labels l ON l.id = il.label_id
|
|
WHERE il.issue_id = ?1
|
|
ORDER BY l.name",
|
|
)?;
|
|
let labels: Vec<String> = label_stmt
|
|
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
|
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
|
Ok(Some(ParentMetadata {
|
|
iid,
|
|
title,
|
|
web_url,
|
|
labels,
|
|
project_path: project_path.to_string(),
|
|
}))
|
|
}
|
|
"MergeRequest" => {
|
|
let parent = conn.query_row(
|
|
"SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
|
|
rusqlite::params![parent_id],
|
|
|row| {
|
|
Ok((
|
|
row.get::<_, i64>(0)?,
|
|
row.get::<_, Option<String>>(1)?,
|
|
row.get::<_, Option<String>>(2)?,
|
|
))
|
|
},
|
|
);
|
|
let (iid, title, web_url) = match parent {
|
|
Ok(r) => r,
|
|
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
let mut label_stmt = conn.prepare_cached(
|
|
"SELECT l.name FROM mr_labels ml
|
|
JOIN labels l ON l.id = ml.label_id
|
|
WHERE ml.merge_request_id = ?1
|
|
ORDER BY l.name",
|
|
)?;
|
|
let labels: Vec<String> = label_stmt
|
|
.query_map(rusqlite::params![parent_id], |row| row.get(0))?
|
|
.collect::<std::result::Result<Vec<_>, _>>()?;
|
|
Ok(Some(ParentMetadata {
|
|
iid,
|
|
title,
|
|
web_url,
|
|
labels,
|
|
project_path: project_path.to_string(),
|
|
}))
|
|
}
|
|
_ => Ok(None),
|
|
}
|
|
}
|
|
|
|
pub fn extract_note_document_cached(
|
|
conn: &Connection,
|
|
note_id: i64,
|
|
cache: &mut ParentMetadataCache,
|
|
) -> Result<Option<DocumentData>> {
|
|
let row = conn.query_row(
|
|
"SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system,
|
|
n.created_at, n.updated_at, n.position_new_path, n.position_new_line,
|
|
n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by,
|
|
d.noteable_type, d.issue_id, d.merge_request_id,
|
|
p.path_with_namespace, p.id AS project_id
|
|
FROM notes n
|
|
JOIN discussions d ON n.discussion_id = d.id
|
|
JOIN projects p ON n.project_id = p.id
|
|
WHERE n.id = ?1",
|
|
rusqlite::params![note_id],
|
|
|row| {
|
|
Ok((
|
|
row.get::<_, i64>(0)?,
|
|
row.get::<_, i64>(1)?,
|
|
row.get::<_, Option<String>>(2)?,
|
|
row.get::<_, Option<String>>(3)?,
|
|
row.get::<_, Option<String>>(4)?,
|
|
row.get::<_, bool>(5)?,
|
|
row.get::<_, i64>(6)?,
|
|
row.get::<_, i64>(7)?,
|
|
row.get::<_, Option<String>>(8)?,
|
|
row.get::<_, Option<i64>>(9)?,
|
|
row.get::<_, Option<String>>(10)?,
|
|
row.get::<_, Option<i64>>(11)?,
|
|
row.get::<_, bool>(12)?,
|
|
row.get::<_, bool>(13)?,
|
|
row.get::<_, Option<String>>(14)?,
|
|
row.get::<_, String>(15)?,
|
|
row.get::<_, Option<i64>>(16)?,
|
|
row.get::<_, Option<i64>>(17)?,
|
|
row.get::<_, String>(18)?,
|
|
row.get::<_, i64>(19)?,
|
|
))
|
|
},
|
|
);
|
|
|
|
let (
|
|
_id,
|
|
gitlab_id,
|
|
author_username,
|
|
body,
|
|
note_type,
|
|
is_system,
|
|
created_at,
|
|
updated_at,
|
|
position_new_path,
|
|
position_new_line,
|
|
position_old_path,
|
|
_position_old_line,
|
|
resolvable,
|
|
resolved,
|
|
_resolved_by,
|
|
noteable_type,
|
|
issue_id,
|
|
merge_request_id,
|
|
path_with_namespace,
|
|
project_id,
|
|
) = match row {
|
|
Ok(r) => r,
|
|
Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
|
|
if is_system {
|
|
return Ok(None);
|
|
}
|
|
|
|
let parent_id = match noteable_type.as_str() {
|
|
"Issue" => match issue_id {
|
|
Some(pid) => pid,
|
|
None => return Ok(None),
|
|
},
|
|
"MergeRequest" => match merge_request_id {
|
|
Some(pid) => pid,
|
|
None => return Ok(None),
|
|
},
|
|
_ => return Ok(None),
|
|
};
|
|
|
|
let parent = cache.get_or_fetch(conn, ¬eable_type, parent_id, &path_with_namespace)?;
|
|
let parent = match parent {
|
|
Some(p) => p,
|
|
None => return Ok(None),
|
|
};
|
|
|
|
let parent_iid = parent.iid;
|
|
let parent_title = parent.title.as_deref();
|
|
let parent_web_url = parent.web_url.as_deref();
|
|
let labels = parent.labels.clone();
|
|
let parent_type_label = noteable_type.as_str();
|
|
|
|
build_note_document(
|
|
note_id,
|
|
gitlab_id,
|
|
author_username,
|
|
body,
|
|
note_type,
|
|
created_at,
|
|
updated_at,
|
|
position_new_path,
|
|
position_new_line,
|
|
position_old_path,
|
|
resolvable,
|
|
resolved,
|
|
parent_iid,
|
|
parent_title,
|
|
parent_web_url,
|
|
&labels,
|
|
parent_type_label,
|
|
&path_with_namespace,
|
|
project_id,
|
|
)
|
|
}
|
|
|
|
#[allow(clippy::too_many_arguments)]
|
|
fn build_note_document(
|
|
note_id: i64,
|
|
gitlab_id: i64,
|
|
author_username: Option<String>,
|
|
body: Option<String>,
|
|
note_type: Option<String>,
|
|
created_at: i64,
|
|
updated_at: i64,
|
|
position_new_path: Option<String>,
|
|
position_new_line: Option<i64>,
|
|
position_old_path: Option<String>,
|
|
resolvable: bool,
|
|
resolved: bool,
|
|
parent_iid: i64,
|
|
parent_title: Option<&str>,
|
|
parent_web_url: Option<&str>,
|
|
labels: &[String],
|
|
parent_type_label: &str,
|
|
path_with_namespace: &str,
|
|
project_id: i64,
|
|
) -> Result<Option<DocumentData>> {
|
|
let mut path_set = BTreeSet::new();
|
|
if let Some(ref p) = position_old_path
|
|
&& !p.is_empty()
|
|
{
|
|
path_set.insert(p.clone());
|
|
}
|
|
if let Some(ref p) = position_new_path
|
|
&& !p.is_empty()
|
|
{
|
|
path_set.insert(p.clone());
|
|
}
|
|
let paths: Vec<String> = path_set.into_iter().collect();
|
|
|
|
let url = parent_web_url.map(|wu| format!("{}#note_{}", wu, gitlab_id));
|
|
|
|
let display_title = parent_title.unwrap_or("(untitled)");
|
|
let display_note_type = note_type.as_deref().unwrap_or("Note");
|
|
let display_author = author_username.as_deref().unwrap_or("unknown");
|
|
let parent_prefix = if parent_type_label == "Issue" {
|
|
format!("Issue #{}", parent_iid)
|
|
} else {
|
|
format!("MR !{}", parent_iid)
|
|
};
|
|
|
|
let title = format!(
|
|
"Note by @{} on {}: {}",
|
|
display_author, parent_prefix, display_title
|
|
);
|
|
|
|
let labels_csv = labels.join(", ");
|
|
|
|
let mut content = String::new();
|
|
let _ = writeln!(content, "[[Note]]");
|
|
let _ = writeln!(content, "source_type: note");
|
|
let _ = writeln!(content, "note_gitlab_id: {}", gitlab_id);
|
|
let _ = writeln!(content, "project: {}", path_with_namespace);
|
|
let _ = writeln!(content, "parent_type: {}", parent_type_label);
|
|
let _ = writeln!(content, "parent_iid: {}", parent_iid);
|
|
let _ = writeln!(content, "parent_title: {}", display_title);
|
|
let _ = writeln!(content, "note_type: {}", display_note_type);
|
|
let _ = writeln!(content, "author: @{}", display_author);
|
|
let _ = writeln!(content, "created_at: {}", ms_to_iso(created_at));
|
|
if resolvable {
|
|
let _ = writeln!(content, "resolved: {}", resolved);
|
|
}
|
|
if display_note_type == "DiffNote"
|
|
&& let Some(ref p) = position_new_path
|
|
{
|
|
if let Some(line) = position_new_line {
|
|
let _ = writeln!(content, "path: {}:{}", p, line);
|
|
} else {
|
|
let _ = writeln!(content, "path: {}", p);
|
|
}
|
|
}
|
|
if !labels.is_empty() {
|
|
let _ = writeln!(content, "labels: {}", labels_csv);
|
|
}
|
|
if let Some(ref u) = url {
|
|
let _ = writeln!(content, "url: {}", u);
|
|
}
|
|
|
|
content.push_str("\n--- Body ---\n\n");
|
|
content.push_str(body.as_deref().unwrap_or(""));
|
|
|
|
let labels_hash = compute_list_hash(labels);
|
|
let paths_hash = compute_list_hash(&paths);
|
|
|
|
let hard_cap = truncate_hard_cap(&content);
|
|
let content_hash = compute_content_hash(&hard_cap.content);
|
|
|
|
Ok(Some(DocumentData {
|
|
source_type: SourceType::Note,
|
|
source_id: note_id,
|
|
project_id,
|
|
author_username,
|
|
labels: labels.to_vec(),
|
|
paths,
|
|
labels_hash,
|
|
paths_hash,
|
|
created_at,
|
|
updated_at,
|
|
url,
|
|
title: Some(title),
|
|
content_text: hard_cap.content,
|
|
content_hash,
|
|
is_truncated: hard_cap.is_truncated,
|
|
truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
|
|
}))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
#[path = "extractor_tests.rs"]
|
|
mod tests;
|