gitlore/src/documents/extractor/discussions.rs

pub fn extract_discussion_document(
    conn: &Connection,
    discussion_id: i64,
) -> Result<Option<DocumentData>> {
    let disc_row = conn.query_row(
        "SELECT d.id, d.noteable_type, d.issue_id, d.merge_request_id,
                p.path_with_namespace, p.id AS project_id
         FROM discussions d
         JOIN projects p ON p.id = d.project_id
         WHERE d.id = ?1",
        rusqlite::params![discussion_id],
        |row| {
            Ok((
                row.get::<_, i64>(0)?,
                row.get::<_, String>(1)?,
                row.get::<_, Option<i64>>(2)?,
                row.get::<_, Option<i64>>(3)?,
                row.get::<_, String>(4)?,
                row.get::<_, i64>(5)?,
            ))
        },
    );

    let (id, noteable_type, issue_id, merge_request_id, path_with_namespace, project_id) =
        match disc_row {
            Ok(r) => r,
            Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
            Err(e) => return Err(e.into()),
        };

    let (_parent_iid, parent_title, parent_web_url, parent_type_prefix, labels) =
        match noteable_type.as_str() {
            "Issue" => {
                let parent_id = match issue_id {
                    Some(pid) => pid,
                    None => return Ok(None),
                };
                let parent = conn.query_row(
                    "SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
                    rusqlite::params![parent_id],
                    |row| {
                        Ok((
                            row.get::<_, i64>(0)?,
                            row.get::<_, Option<String>>(1)?,
                            row.get::<_, Option<String>>(2)?,
                        ))
                    },
                );
                let (iid, title, web_url) = match parent {
                    Ok(r) => r,
                    Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
                    Err(e) => return Err(e.into()),
                };
                let mut label_stmt = conn.prepare_cached(
                    "SELECT l.name FROM issue_labels il
                     JOIN labels l ON l.id = il.label_id
                     WHERE il.issue_id = ?1
                     ORDER BY l.name",
                )?;
                let labels: Vec<String> = label_stmt
                    .query_map(rusqlite::params![parent_id], |row| row.get(0))?
                    .collect::<std::result::Result<Vec<_>, _>>()?;

                (iid, title, web_url, format!("Issue #{}", iid), labels)
            }
            "MergeRequest" => {
                let parent_id = match merge_request_id {
                    Some(pid) => pid,
                    None => return Ok(None),
                };
                let parent = conn.query_row(
                    "SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
                    rusqlite::params![parent_id],
                    |row| {
                        Ok((
                            row.get::<_, i64>(0)?,
                            row.get::<_, Option<String>>(1)?,
                            row.get::<_, Option<String>>(2)?,
                        ))
                    },
                );
                let (iid, title, web_url) = match parent {
                    Ok(r) => r,
                    Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
                    Err(e) => return Err(e.into()),
                };
                let mut label_stmt = conn.prepare_cached(
                    "SELECT l.name FROM mr_labels ml
                     JOIN labels l ON l.id = ml.label_id
                     WHERE ml.merge_request_id = ?1
                     ORDER BY l.name",
                )?;
                let labels: Vec<String> = label_stmt
                    .query_map(rusqlite::params![parent_id], |row| row.get(0))?
                    .collect::<std::result::Result<Vec<_>, _>>()?;

                (iid, title, web_url, format!("MR !{}", iid), labels)
            }
            _ => return Ok(None),
        };

    let mut note_stmt = conn.prepare_cached(
        "SELECT n.author_username, n.body, n.created_at, n.gitlab_id,
                n.note_type, n.position_old_path, n.position_new_path
         FROM notes n
         WHERE n.discussion_id = ?1 AND n.is_system = 0
         ORDER BY n.created_at ASC, n.id ASC",
    )?;

    struct NoteRow {
        author: Option<String>,
        body: Option<String>,
        created_at: i64,
        gitlab_id: i64,
        old_path: Option<String>,
        new_path: Option<String>,
    }

    let notes: Vec<NoteRow> = note_stmt
        .query_map(rusqlite::params![id], |row| {
            Ok(NoteRow {
                author: row.get(0)?,
                body: row.get(1)?,
                created_at: row.get(2)?,
                gitlab_id: row.get(3)?,
                old_path: row.get(5)?,
                new_path: row.get(6)?,
            })
        })?
        .collect::<std::result::Result<Vec<_>, _>>()?;

    if notes.is_empty() {
        return Ok(None);
    }

    let mut path_set = BTreeSet::new();
    for note in &notes {
        if let Some(ref p) = note.old_path
            && !p.is_empty()
        {
            path_set.insert(p.clone());
        }
        if let Some(ref p) = note.new_path
            && !p.is_empty()
        {
            path_set.insert(p.clone());
        }
    }
    let paths: Vec<String> = path_set.into_iter().collect();

    let first_note_gitlab_id = notes[0].gitlab_id;
    let url = parent_web_url
        .as_ref()
        .map(|wu| format!("{}#note_{}", wu, first_note_gitlab_id));

    let author_username = notes[0].author.clone();

    let display_title = parent_title.as_deref().unwrap_or("(untitled)");
    let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());
    let paths_json = serde_json::to_string(&paths).unwrap_or_else(|_| "[]".to_string());

    let mut content = format!(
        "[[Discussion]] {}: {}\nProject: {}\n",
        parent_type_prefix, display_title, path_with_namespace
    );
    if let Some(ref u) = url {
        let _ = writeln!(content, "URL: {}", u);
    }
    let _ = writeln!(content, "Labels: {}", labels_json);
    if !paths.is_empty() {
        let _ = writeln!(content, "Files: {}", paths_json);
    }

    let note_contents: Vec<NoteContent> = notes
        .iter()
        .map(|note| NoteContent {
            author: note.author.as_deref().unwrap_or("unknown").to_string(),
            date: format_date(note.created_at),
            body: note.body.as_deref().unwrap_or("").to_string(),
        })
        .collect();

    let header_len = content.len() + "\n--- Thread ---\n\n".len();
    let thread_budget = MAX_DISCUSSION_BYTES.saturating_sub(header_len);

    let thread_result = truncate_discussion(&note_contents, thread_budget);
    content.push_str("\n--- Thread ---\n\n");
    content.push_str(&thread_result.content);

    let created_at = notes[0].created_at;
    let updated_at = notes.last().map(|n| n.created_at).unwrap_or(created_at);

    let content_hash = compute_content_hash(&content);
    let labels_hash = compute_list_hash(&labels);
    let paths_hash = compute_list_hash(&paths);

    Ok(Some(DocumentData {
        source_type: SourceType::Discussion,
        source_id: id,
        project_id,
        author_username,
        labels,
        paths,
        labels_hash,
        paths_hash,
        created_at,
        updated_at,
        url,
        title: None,
        content_text: content,
        content_hash,
        is_truncated: thread_result.is_truncated,
        truncated_reason: thread_result.reason.map(|r| r.as_str().to_string()),
    }))
}