//! Performance benchmarks for optimization verification.
//! Run with: cargo test --test perf_benchmark -- --nocapture

use rusqlite::Connection;
use std::time::Instant;

fn setup_db() -> Connection {
    let conn = Connection::open_in_memory().unwrap();
    conn.execute_batch(
        "
        PRAGMA journal_mode = WAL;
        PRAGMA synchronous = NORMAL;

        CREATE TABLE projects (
            id INTEGER PRIMARY KEY,
            gitlab_project_id INTEGER UNIQUE NOT NULL,
            path_with_namespace TEXT NOT NULL,
            default_branch TEXT,
            web_url TEXT,
            created_at INTEGER,
            updated_at INTEGER,
            raw_payload_id INTEGER
        );
        INSERT INTO projects (id, gitlab_project_id, path_with_namespace)
            VALUES (1, 100, 'group/project');

        CREATE TABLE issues (
            id INTEGER PRIMARY KEY,
            gitlab_id INTEGER UNIQUE NOT NULL,
            project_id INTEGER NOT NULL REFERENCES projects(id),
            iid INTEGER NOT NULL,
            title TEXT,
            description TEXT,
            state TEXT NOT NULL,
            author_username TEXT,
            created_at INTEGER NOT NULL,
            updated_at INTEGER NOT NULL,
            last_seen_at INTEGER NOT NULL,
            discussions_synced_for_updated_at INTEGER,
            resource_events_synced_for_updated_at INTEGER,
            web_url TEXT,
            raw_payload_id INTEGER
        );
        CREATE TABLE labels (
            id INTEGER PRIMARY KEY,
            gitlab_id INTEGER,
            project_id INTEGER NOT NULL REFERENCES projects(id),
            name TEXT NOT NULL,
            color TEXT,
            description TEXT
        );
        CREATE TABLE issue_labels (
            issue_id INTEGER NOT NULL REFERENCES issues(id),
            label_id INTEGER NOT NULL REFERENCES labels(id),
            PRIMARY KEY(issue_id, label_id)
        );

        CREATE TABLE documents (
            id INTEGER PRIMARY KEY,
            source_type TEXT NOT NULL,
            source_id INTEGER NOT NULL,
            project_id INTEGER NOT NULL,
            author_username TEXT,
            label_names TEXT,
            created_at INTEGER,
            updated_at INTEGER,
            url TEXT,
            title TEXT,
            content_text TEXT NOT NULL,
            content_hash TEXT NOT NULL,
            labels_hash TEXT NOT NULL DEFAULT '',
            paths_hash TEXT NOT NULL DEFAULT '',
            is_truncated INTEGER NOT NULL DEFAULT 0,
            truncated_reason TEXT,
            UNIQUE(source_type, source_id)
        );
        CREATE TABLE document_labels (
            document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
            label_name TEXT NOT NULL,
            PRIMARY KEY(document_id, label_name)
        );
        CREATE TABLE document_paths (
            document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
            path TEXT NOT NULL,
            PRIMARY KEY(document_id, path)
        );
        CREATE TABLE dirty_sources (
            source_type TEXT NOT NULL,
            source_id INTEGER NOT NULL,
            queued_at INTEGER NOT NULL,
            attempt_count INTEGER NOT NULL DEFAULT 0,
            last_attempt_at INTEGER,
            last_error TEXT,
            next_attempt_at INTEGER,
            PRIMARY KEY(source_type, source_id)
        );
        CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at);
    ",
    )
    .unwrap();
    conn
}

/// Simulate the OLD approach: individual INSERT per label
fn insert_labels_individual(conn: &Connection, doc_id: i64, labels: &[&str]) {
    conn.execute(
        "DELETE FROM document_labels WHERE document_id = ?1",
        [doc_id],
    )
    .unwrap();
    for label in labels {
        conn.execute(
            "INSERT INTO document_labels (document_id, label_name) VALUES (?1, ?2)",
            rusqlite::params![doc_id, label],
        )
        .unwrap();
    }
}

/// Simulate the NEW approach: batch INSERT
fn insert_labels_batch(conn: &Connection, doc_id: i64, labels: &[&str]) {
    conn.execute(
        "DELETE FROM document_labels WHERE document_id = ?1",
        [doc_id],
    )
    .unwrap();
    if !labels.is_empty() {
        let placeholders: String = labels
            .iter()
            .enumerate()
            .map(|(i, _)| format!("(?1, ?{})", i + 2))
            .collect::<Vec<_>>()
            .join(", ");
        let sql = format!(
            "INSERT INTO document_labels (document_id, label_name) VALUES {}",
            placeholders
        );
        let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![Box::new(doc_id)];
        for label in labels {
            params.push(Box::new(*label));
        }
        let param_refs: Vec<&dyn rusqlite::types::ToSql> =
            params.iter().map(|p| p.as_ref()).collect();
        conn.execute(&sql, param_refs.as_slice()).unwrap();
    }
}

/// Simulate OLD string building: format! + push_str
fn build_content_old(
    iid: i64,
    title: &str,
    project: &str,
    labels: &str,
    state: &str,
    author: &str,
    url: &str,
) -> String {
    let mut content = format!("[[Issue]] #{}: {}\nProject: {}\n", iid, title, project);
    content.push_str(&format!("URL: {}\n", url));
    content.push_str(&format!("Labels: {}\n", labels));
    content.push_str(&format!("State: {}\n", state));
    content.push_str(&format!("Author: @{}\n", author));
    content
}

/// Simulate NEW string building: writeln! directly
fn build_content_new(
    iid: i64,
    title: &str,
    project: &str,
    labels: &str,
    state: &str,
    author: &str,
    url: &str,
) -> String {
    use std::fmt::Write as _;
    let mut content = format!("[[Issue]] #{}: {}\nProject: {}\n", iid, title, project);
    let _ = writeln!(content, "URL: {}", url);
    let _ = writeln!(content, "Labels: {}", labels);
    let _ = writeln!(content, "State: {}", state);
    let _ = writeln!(content, "Author: @{}", author);
    content
}

const LABEL_SETS: &[&[&str]] = &[
    &["bug", "critical", "backend", "needs-review", "p1"],
    &["feature", "frontend", "design", "ux"],
    &["bug", "database", "performance"],
    &["docs", "api"],
    &[
        "infrastructure",
        "ci-cd",
        "devops",
        "monitoring",
        "alerting",
        "sre",
    ],
];

#[test]
fn bench_label_insert_individual_vs_batch() {
    let conn = setup_db();

    // Create a document to attach labels to
    conn.execute(
        "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
         VALUES (1, 10, 1, 42, 'Test Issue', 'opened', 1000, 2000, 3000)",
        [],
    ).unwrap();
    conn.execute(
        "INSERT INTO documents (id, source_type, source_id, project_id, content_text, content_hash, labels_hash, paths_hash)
         VALUES (1, 'issue', 1, 1, 'test content', 'hash1', 'lhash1', 'phash1')",
        [],
    ).unwrap();

    let iterations = 5000;

    // Warm up
    for labels in LABEL_SETS {
        insert_labels_individual(&conn, 1, labels);
        insert_labels_batch(&conn, 1, labels);
    }

    // Benchmark INDIVIDUAL inserts
    let start = Instant::now();
    for i in 0..iterations {
        let labels = LABEL_SETS[i % LABEL_SETS.len()];
        insert_labels_individual(&conn, 1, labels);
    }
    let individual_elapsed = start.elapsed();

    // Benchmark BATCH inserts
    let start = Instant::now();
    for i in 0..iterations {
        let labels = LABEL_SETS[i % LABEL_SETS.len()];
        insert_labels_batch(&conn, 1, labels);
    }
    let batch_elapsed = start.elapsed();

    let speedup = individual_elapsed.as_nanos() as f64 / batch_elapsed.as_nanos() as f64;

    println!(
        "\n=== Label INSERT Benchmark ({} iterations) ===",
        iterations
    );
    println!("Individual INSERTs: {:?}", individual_elapsed);
    println!("Batch INSERT:       {:?}", batch_elapsed);
    println!("Speedup:            {:.2}x", speedup);
    println!();

    // Verify correctness: both approaches produce same result
    insert_labels_individual(&conn, 1, &["a", "b", "c"]);
    let individual_labels: Vec<String> = conn
        .prepare("SELECT label_name FROM document_labels WHERE document_id = 1 ORDER BY label_name")
        .unwrap()
        .query_map([], |row| row.get(0))
        .unwrap()
        .collect::<Result<Vec<_>, _>>()
        .unwrap();

    insert_labels_batch(&conn, 1, &["a", "b", "c"]);
    let batch_labels: Vec<String> = conn
        .prepare("SELECT label_name FROM document_labels WHERE document_id = 1 ORDER BY label_name")
        .unwrap()
        .query_map([], |row| row.get(0))
        .unwrap()
        .collect::<Result<Vec<_>, _>>()
        .unwrap();

    assert_eq!(
        individual_labels, batch_labels,
        "Both approaches must produce identical results"
    );
}

#[test]
fn bench_string_building_old_vs_new() {
    let iterations = 50_000;

    // Warm up
    for _ in 0..100 {
        let _ = build_content_old(
            42,
            "Fix authentication bug in login flow",
            "mygroup/myproject",
            "[\"bug\",\"auth\",\"critical\"]",
            "opened",
            "alice",
            "https://gitlab.example.com/mygroup/myproject/-/issues/42",
        );
        let _ = build_content_new(
            42,
            "Fix authentication bug in login flow",
            "mygroup/myproject",
            "[\"bug\",\"auth\",\"critical\"]",
            "opened",
            "alice",
            "https://gitlab.example.com/mygroup/myproject/-/issues/42",
        );
    }

    // Benchmark OLD
    let start = Instant::now();
    for i in 0..iterations {
        let s = build_content_old(
            i as i64,
            "Fix authentication bug in login flow with extended description",
            "mygroup/myproject",
            "[\"bug\",\"auth\",\"critical\",\"backend\",\"needs-review\"]",
            "opened",
            "alice",
            "https://gitlab.example.com/mygroup/myproject/-/issues/42",
        );
        std::hint::black_box(s);
    }
    let old_elapsed = start.elapsed();

    // Benchmark NEW
    let start = Instant::now();
    for i in 0..iterations {
        let s = build_content_new(
            i as i64,
            "Fix authentication bug in login flow with extended description",
            "mygroup/myproject",
            "[\"bug\",\"auth\",\"critical\",\"backend\",\"needs-review\"]",
            "opened",
            "alice",
            "https://gitlab.example.com/mygroup/myproject/-/issues/42",
        );
        std::hint::black_box(s);
    }
    let new_elapsed = start.elapsed();

    let speedup = old_elapsed.as_nanos() as f64 / new_elapsed.as_nanos() as f64;

    println!(
        "\n=== String Building Benchmark ({} iterations) ===",
        iterations
    );
    println!("format!+push_str: {:?}", old_elapsed);
    println!("writeln!:         {:?}", new_elapsed);
    println!("Speedup:          {:.2}x", speedup);
    println!();

    // Verify correctness: both produce identical output
    let old = build_content_old(
        42,
        "Test",
        "group/proj",
        "[\"bug\"]",
        "opened",
        "alice",
        "https://example.com",
    );
    let new = build_content_new(
        42,
        "Test",
        "group/proj",
        "[\"bug\"]",
        "opened",
        "alice",
        "https://example.com",
    );
    assert_eq!(old, new, "Both approaches must produce identical strings");
}

#[test]
fn bench_prepare_vs_prepare_cached() {
    let conn = setup_db();

    // Seed some documents
    for i in 1..=100 {
        conn.execute(
            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
             VALUES (?1, ?2, 1, ?1, 'Test', 'opened', 1000, 2000, 3000)",
            rusqlite::params![i, i * 10],
        ).unwrap();
        conn.execute(
            "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash, labels_hash, paths_hash)
             VALUES ('issue', ?1, 1, 'content', ?2, 'lh', 'ph')",
            rusqlite::params![i, format!("hash_{}", i)],
        ).unwrap();
    }

    let iterations = 10_000;
    let sql = "SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2";

    // Benchmark prepare (uncached)
    let start = Instant::now();
    for i in 0..iterations {
        let source_id = (i % 100) + 1;
        let mut stmt = conn.prepare(sql).unwrap();
        let _hash: Option<String> = stmt
            .query_row(rusqlite::params!["issue", source_id as i64], |row| {
                row.get(0)
            })
            .ok();
    }
    let uncached_elapsed = start.elapsed();

    // Benchmark prepare_cached
    let start = Instant::now();
    for i in 0..iterations {
        let source_id = (i % 100) + 1;
        let mut stmt = conn.prepare_cached(sql).unwrap();
        let _hash: Option<String> = stmt
            .query_row(rusqlite::params!["issue", source_id as i64], |row| {
                row.get(0)
            })
            .ok();
    }
    let cached_elapsed = start.elapsed();

    let speedup = uncached_elapsed.as_nanos() as f64 / cached_elapsed.as_nanos() as f64;

    println!(
        "\n=== prepare vs prepare_cached Benchmark ({} iterations) ===",
        iterations
    );
    println!("prepare():        {:?}", uncached_elapsed);
    println!("prepare_cached(): {:?}", cached_elapsed);
    println!("Speedup:          {:.2}x", speedup);
    println!();
}

/// Benchmark: redundant hash query elimination in document regeneration.
/// OLD: get_existing_hash (1 query) + upsert_document_inner (1 query) = 2 queries per doc
/// NEW: upsert_document_inner only (1 query) = 1 query per doc
#[test]
fn bench_redundant_hash_query_elimination() {
    let conn = setup_db();

    // Seed documents
    for i in 1..=100 {
        conn.execute(
            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
             VALUES (?1, ?2, 1, ?1, 'Test', 'opened', 1000, 2000, 3000)",
            rusqlite::params![i, i * 10],
        ).unwrap();
        conn.execute(
            "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash, labels_hash, paths_hash)
             VALUES ('issue', ?1, 1, 'content', ?2, 'lh', 'ph')",
            rusqlite::params![i, format!("hash_{}", i)],
        ).unwrap();
    }

    let iterations = 10_000;
    let hash_sql = "SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2";
    let full_sql = "SELECT id, content_hash, labels_hash, paths_hash FROM documents WHERE source_type = ?1 AND source_id = ?2";

    // OLD: 2 queries per document (get_existing_hash + upsert_document_inner)
    let start = Instant::now();
    for i in 0..iterations {
        let source_id = (i % 100) + 1;
        // Query 1: get_existing_hash
        let mut stmt1 = conn.prepare_cached(hash_sql).unwrap();
        let _hash: Option<String> = stmt1
            .query_row(rusqlite::params!["issue", source_id as i64], |row| {
                row.get(0)
            })
            .ok();
        // Query 2: upsert_document_inner
        let mut stmt2 = conn.prepare_cached(full_sql).unwrap();
        let _existing: Option<(i64, String, String, String)> = stmt2
            .query_row(rusqlite::params!["issue", source_id as i64], |row| {
                Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
            })
            .ok();
        std::hint::black_box((_hash, _existing));
    }
    let old_elapsed = start.elapsed();

    // NEW: 1 query per document (upsert_document_inner returns change info)
    let start = Instant::now();
    for i in 0..iterations {
        let source_id = (i % 100) + 1;
        // Single query that provides both change detection and upsert data
        let mut stmt = conn.prepare_cached(full_sql).unwrap();
        let existing: Option<(i64, String, String, String)> = stmt
            .query_row(rusqlite::params!["issue", source_id as i64], |row| {
                Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
            })
            .ok();
        let _changed = match &existing {
            Some((_, old_hash, _, _)) => old_hash != &format!("hash_{}", source_id),
            None => true,
        };
        std::hint::black_box((existing, _changed));
    }
    let new_elapsed = start.elapsed();

    let speedup = old_elapsed.as_nanos() as f64 / new_elapsed.as_nanos() as f64;

    println!(
        "\n=== Redundant Hash Query Elimination ({} iterations) ===",
        iterations
    );
    println!("OLD (2 queries): {:?}", old_elapsed);
    println!("NEW (1 query):   {:?}", new_elapsed);
    println!("Speedup:         {:.2}x", speedup);
    println!();
}

// NOTE: SHA256 hex formatting (format!("{:x}") vs LUT) was benchmarked at 1.01x.
// The SHA256 computation dominates; hex encoding is negligible. Optimization reverted.

// NOTE: compute_list_hash indirect index sort vs direct &str sort was benchmarked at 1.02x.
// SHA256 dominates here too; the sort strategy is negligible. Optimization reverted.

/// Benchmark: f32-to-bytes conversion - allocate-per-call vs reusable buffer.
/// The embedding pipeline converts 768 f32s to 3072 bytes per chunk stored.
#[test]
fn bench_embedding_bytes_alloc_vs_reuse() {
    // Simulate 768-dim embeddings (nomic-embed-text)
    let dims = 768;
    let embeddings: Vec<Vec<f32>> = (0..100)
        .map(|i| (0..dims).map(|j| (i * dims + j) as f32 * 0.001).collect())
        .collect();
    let iterations = 50_000;

    fn to_bytes_alloc(embedding: &[f32]) -> Vec<u8> {
        let mut bytes = Vec::with_capacity(embedding.len() * 4);
        for f in embedding {
            bytes.extend_from_slice(&f.to_le_bytes());
        }
        bytes
    }

    fn to_bytes_reuse(embedding: &[f32], buf: &mut Vec<u8>) {
        buf.clear();
        buf.reserve(embedding.len() * 4);
        for f in embedding {
            buf.extend_from_slice(&f.to_le_bytes());
        }
    }

    // Warm up
    let mut buf = Vec::with_capacity(dims * 4);
    for emb in &embeddings {
        let _ = to_bytes_alloc(emb);
        to_bytes_reuse(emb, &mut buf);
    }

    // Benchmark OLD: allocate per call
    let start = Instant::now();
    for i in 0..iterations {
        let emb = &embeddings[i % embeddings.len()];
        let bytes = to_bytes_alloc(emb);
        std::hint::black_box(&bytes);
    }
    let old_elapsed = start.elapsed();

    // Benchmark NEW: reusable buffer
    let start = Instant::now();
    let mut buf = Vec::with_capacity(dims * 4);
    for i in 0..iterations {
        let emb = &embeddings[i % embeddings.len()];
        to_bytes_reuse(emb, &mut buf);
        std::hint::black_box(&buf);
    }
    let new_elapsed = start.elapsed();

    let speedup = old_elapsed.as_nanos() as f64 / new_elapsed.as_nanos() as f64;

    println!(
        "\n=== Embedding Bytes Conversion Benchmark ({} iterations, {} dims) ===",
        iterations, dims
    );
    println!("Alloc per call:   {:?}", old_elapsed);
    println!("Reusable buffer:  {:?}", new_elapsed);
    println!("Speedup:          {:.2}x", speedup);
    println!();

    // Verify correctness
    let test_emb: Vec<f32> = (0..dims).map(|i| i as f32 * 0.1).collect();
    let alloc_result = to_bytes_alloc(&test_emb);
    to_bytes_reuse(&test_emb, &mut buf);
    assert_eq!(
        alloc_result, buf,
        "Both approaches must produce identical bytes"
    );
}