gitlore/src/documents/extractor/issues.rs

pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option<DocumentData>> {
    let row = conn.query_row(
        "SELECT i.id, i.iid, i.title, i.description, i.state, i.author_username,
                i.created_at, i.updated_at, i.web_url,
                p.path_with_namespace, p.id AS project_id
         FROM issues i
         JOIN projects p ON p.id = i.project_id
         WHERE i.id = ?1",
        rusqlite::params![issue_id],
        |row| {
            Ok((
                row.get::<_, i64>(0)?,
                row.get::<_, i64>(1)?,
                row.get::<_, Option<String>>(2)?,
                row.get::<_, Option<String>>(3)?,
                row.get::<_, String>(4)?,
                row.get::<_, Option<String>>(5)?,
                row.get::<_, i64>(6)?,
                row.get::<_, i64>(7)?,
                row.get::<_, Option<String>>(8)?,
                row.get::<_, String>(9)?,
                row.get::<_, i64>(10)?,
            ))
        },
    );

    let (
        id,
        iid,
        title,
        description,
        state,
        author_username,
        created_at,
        updated_at,
        web_url,
        path_with_namespace,
        project_id,
    ) = match row {
        Ok(r) => r,
        Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
        Err(e) => return Err(e.into()),
    };

    let mut label_stmt = conn.prepare_cached(
        "SELECT l.name FROM issue_labels il
         JOIN labels l ON l.id = il.label_id
         WHERE il.issue_id = ?1
         ORDER BY l.name",
    )?;
    let labels: Vec<String> = label_stmt
        .query_map(rusqlite::params![id], |row| row.get(0))?
        .collect::<std::result::Result<Vec<_>, _>>()?;

    let labels_json = serde_json::to_string(&labels).unwrap_or_else(|_| "[]".to_string());

    let display_title = title.as_deref().unwrap_or("(untitled)");
    let embed_title = normalize_title_for_embedding(display_title);
    let mut content = format!(
        "[[Issue]] #{}: {}\nProject: {}\n",
        iid, embed_title, path_with_namespace
    );
    if let Some(ref url) = web_url {
        let _ = writeln!(content, "URL: {}", url);
    }
    let _ = writeln!(content, "Labels: {}", labels_json);
    let _ = writeln!(content, "State: {}", state);
    if let Some(ref author) = author_username {
        let _ = writeln!(content, "Author: @{}", author);
    }

    if let Some(ref desc) = description {
        content.push_str("\n--- Description ---\n\n");
        // Pre-truncate to avoid unbounded memory allocation for huge descriptions
        let pre_trunc = pre_truncate_description(desc, MAX_DOCUMENT_BYTES_HARD);
        if pre_trunc.was_truncated {
            warn!(
                iid,
                original_bytes = pre_trunc.original_bytes,
                "Issue description truncated (oversized)"
            );
        }
        content.push_str(&pre_trunc.content);
    }

    let labels_hash = compute_list_hash(&labels);
    let paths_hash = compute_list_hash(&[]);

    let hard_cap = truncate_hard_cap(&content);
    let content_hash = compute_content_hash(&hard_cap.content);

    Ok(Some(DocumentData {
        source_type: SourceType::Issue,
        source_id: id,
        project_id,
        author_username,
        labels,
        paths: Vec::new(),
        labels_hash,
        paths_hash,
        created_at,
        updated_at,
        url: web_url,
        title: Some(display_title.to_string()),
        content_text: hard_cap.content,
        content_hash,
        is_truncated: hard_cap.is_truncated,
        truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
    }))
}