Compare commits
3 Commits
3767c33c28
...
e8845380e9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e8845380e9 | ||
|
|
3e9cf2358e | ||
|
|
16beb35a69 |
@@ -3,6 +3,7 @@ use rusqlite::Connection;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::Write as _;
|
||||
|
||||
use super::truncation::{
|
||||
MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap,
|
||||
@@ -143,12 +144,12 @@ pub fn extract_issue_document(conn: &Connection, issue_id: i64) -> Result<Option
|
||||
iid, display_title, path_with_namespace
|
||||
);
|
||||
if let Some(ref url) = web_url {
|
||||
content.push_str(&format!("URL: {}\n", url));
|
||||
let _ = writeln!(content, "URL: {}", url);
|
||||
}
|
||||
content.push_str(&format!("Labels: {}\n", labels_json));
|
||||
content.push_str(&format!("State: {}\n", state));
|
||||
let _ = writeln!(content, "Labels: {}", labels_json);
|
||||
let _ = writeln!(content, "State: {}", state);
|
||||
if let Some(ref author) = author_username {
|
||||
content.push_str(&format!("Author: @{}\n", author));
|
||||
let _ = writeln!(content, "Author: @{}", author);
|
||||
}
|
||||
|
||||
if let Some(ref desc) = description {
|
||||
@@ -250,15 +251,15 @@ pub fn extract_mr_document(conn: &Connection, mr_id: i64) -> Result<Option<Docum
|
||||
iid, display_title, path_with_namespace
|
||||
);
|
||||
if let Some(ref url) = web_url {
|
||||
content.push_str(&format!("URL: {}\n", url));
|
||||
let _ = writeln!(content, "URL: {}", url);
|
||||
}
|
||||
content.push_str(&format!("Labels: {}\n", labels_json));
|
||||
content.push_str(&format!("State: {}\n", display_state));
|
||||
let _ = writeln!(content, "Labels: {}", labels_json);
|
||||
let _ = writeln!(content, "State: {}", display_state);
|
||||
if let Some(ref author) = author_username {
|
||||
content.push_str(&format!("Author: @{}\n", author));
|
||||
let _ = writeln!(content, "Author: @{}", author);
|
||||
}
|
||||
if let (Some(src), Some(tgt)) = (&source_branch, &target_branch) {
|
||||
content.push_str(&format!("Source: {} -> {}\n", src, tgt));
|
||||
let _ = writeln!(content, "Source: {} -> {}", src, tgt);
|
||||
}
|
||||
|
||||
if let Some(ref desc) = description {
|
||||
@@ -464,11 +465,11 @@ pub fn extract_discussion_document(
|
||||
parent_type_prefix, display_title, path_with_namespace
|
||||
);
|
||||
if let Some(ref u) = url {
|
||||
content.push_str(&format!("URL: {}\n", u));
|
||||
let _ = writeln!(content, "URL: {}", u);
|
||||
}
|
||||
content.push_str(&format!("Labels: {}\n", labels_json));
|
||||
let _ = writeln!(content, "Labels: {}", labels_json);
|
||||
if !paths.is_empty() {
|
||||
content.push_str(&format!("Files: {}\n", paths_json));
|
||||
let _ = writeln!(content, "Files: {}", paths_json);
|
||||
}
|
||||
|
||||
let note_contents: Vec<NoteContent> = notes
|
||||
|
||||
@@ -108,8 +108,9 @@ fn get_existing_hash(
|
||||
source_type: SourceType,
|
||||
source_id: i64,
|
||||
) -> Result<Option<String>> {
|
||||
let mut stmt = conn
|
||||
.prepare("SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2")?;
|
||||
let mut stmt = conn.prepare_cached(
|
||||
"SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2",
|
||||
)?;
|
||||
|
||||
let hash: Option<String> = stmt
|
||||
.query_row(rusqlite::params![source_type.as_str(), source_id], |row| {
|
||||
@@ -206,11 +207,25 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> {
|
||||
"DELETE FROM document_labels WHERE document_id = ?1",
|
||||
[doc_id],
|
||||
)?;
|
||||
if !doc.labels.is_empty() {
|
||||
let placeholders: String = doc
|
||||
.labels
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, _)| format!("(?1, ?{})", i + 2))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
let sql = format!(
|
||||
"INSERT INTO document_labels (document_id, label_name) VALUES {}",
|
||||
placeholders
|
||||
);
|
||||
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![Box::new(doc_id)];
|
||||
for label in &doc.labels {
|
||||
conn.execute(
|
||||
"INSERT INTO document_labels (document_id, label_name) VALUES (?1, ?2)",
|
||||
rusqlite::params![doc_id, label],
|
||||
)?;
|
||||
params.push(Box::new(label.as_str()));
|
||||
}
|
||||
let param_refs: Vec<&dyn rusqlite::types::ToSql> =
|
||||
params.iter().map(|p| p.as_ref()).collect();
|
||||
conn.execute(&sql, param_refs.as_slice())?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -223,11 +238,25 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<()> {
|
||||
"DELETE FROM document_paths WHERE document_id = ?1",
|
||||
[doc_id],
|
||||
)?;
|
||||
if !doc.paths.is_empty() {
|
||||
let placeholders: String = doc
|
||||
.paths
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, _)| format!("(?1, ?{})", i + 2))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
let sql = format!(
|
||||
"INSERT INTO document_paths (document_id, path) VALUES {}",
|
||||
placeholders
|
||||
);
|
||||
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![Box::new(doc_id)];
|
||||
for path in &doc.paths {
|
||||
conn.execute(
|
||||
"INSERT INTO document_paths (document_id, path) VALUES (?1, ?2)",
|
||||
rusqlite::params![doc_id, path],
|
||||
)?;
|
||||
params.push(Box::new(path.as_str()));
|
||||
}
|
||||
let param_refs: Vec<&dyn rusqlite::types::ToSql> =
|
||||
params.iter().map(|p| p.as_ref()).collect();
|
||||
conn.execute(&sql, param_refs.as_slice())?;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -27,9 +27,9 @@ pub struct OllamaClient {
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct EmbedRequest {
|
||||
model: String,
|
||||
input: Vec<String>,
|
||||
struct EmbedRequest<'a> {
|
||||
model: &'a str,
|
||||
input: Vec<&'a str>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
@@ -101,12 +101,12 @@ impl OllamaClient {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn embed_batch(&self, texts: Vec<String>) -> Result<Vec<Vec<f32>>> {
|
||||
pub async fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>> {
|
||||
let url = format!("{}/api/embed", self.config.base_url);
|
||||
|
||||
let request = EmbedRequest {
|
||||
model: self.config.model.clone(),
|
||||
input: texts,
|
||||
model: &self.config.model,
|
||||
input: texts.to_vec(),
|
||||
};
|
||||
|
||||
let response = self
|
||||
@@ -181,8 +181,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_embed_request_serialization() {
|
||||
let request = EmbedRequest {
|
||||
model: "nomic-embed-text".to_string(),
|
||||
input: vec!["hello".to_string(), "world".to_string()],
|
||||
model: "nomic-embed-text",
|
||||
input: vec!["hello", "world"],
|
||||
};
|
||||
let json = serde_json::to_string(&request).unwrap();
|
||||
assert!(json.contains("\"model\":\"nomic-embed-text\""));
|
||||
|
||||
@@ -162,9 +162,9 @@ async fn embed_page(
|
||||
let mut cleared_docs: HashSet<i64> = HashSet::with_capacity(pending.len());
|
||||
|
||||
for batch in all_chunks.chunks(BATCH_SIZE) {
|
||||
let texts: Vec<String> = batch.iter().map(|c| c.text.clone()).collect();
|
||||
let texts: Vec<&str> = batch.iter().map(|c| c.text.as_str()).collect();
|
||||
|
||||
match client.embed_batch(texts).await {
|
||||
match client.embed_batch(&texts).await {
|
||||
Ok(embeddings) => {
|
||||
for (i, embedding) in embeddings.iter().enumerate() {
|
||||
if i >= batch.len() {
|
||||
@@ -228,7 +228,7 @@ async fn embed_page(
|
||||
if is_context_error && batch.len() > 1 {
|
||||
warn!("Batch failed with context length error, retrying chunks individually");
|
||||
for chunk in batch {
|
||||
match client.embed_batch(vec![chunk.text.clone()]).await {
|
||||
match client.embed_batch(&[chunk.text.as_str()]).await {
|
||||
Ok(embeddings)
|
||||
if !embeddings.is_empty()
|
||||
&& embeddings[0].len() == EXPECTED_DIMS =>
|
||||
|
||||
@@ -67,7 +67,7 @@ pub fn search_fts(
|
||||
LIMIT ?2
|
||||
"#;
|
||||
|
||||
let mut stmt = conn.prepare(sql)?;
|
||||
let mut stmt = conn.prepare_cached(sql)?;
|
||||
let results = stmt
|
||||
.query_map(rusqlite::params![fts_query, limit as i64], |row| {
|
||||
Ok(FtsResult {
|
||||
|
||||
@@ -3,6 +3,7 @@ use rusqlite::Connection;
|
||||
use crate::core::error::Result;
|
||||
use crate::embedding::ollama::OllamaClient;
|
||||
use crate::search::filters::{SearchFilters, apply_filters};
|
||||
use crate::search::rrf::RrfResult;
|
||||
use crate::search::{FtsQueryMode, rank_rrf, search_fts, search_vector};
|
||||
|
||||
const BASE_RECALL_MIN: usize = 50;
|
||||
@@ -77,7 +78,7 @@ pub async fn search_hybrid(
|
||||
));
|
||||
};
|
||||
|
||||
let query_embedding = client.embed_batch(vec![query.to_string()]).await?;
|
||||
let query_embedding = client.embed_batch(&[query]).await?;
|
||||
let embedding = query_embedding.into_iter().next().unwrap_or_default();
|
||||
|
||||
if embedding.is_empty() {
|
||||
@@ -102,7 +103,7 @@ pub async fn search_hybrid(
|
||||
.collect();
|
||||
|
||||
match client {
|
||||
Some(client) => match client.embed_batch(vec![query.to_string()]).await {
|
||||
Some(client) => match client.embed_batch(&[query]).await {
|
||||
Ok(query_embedding) => {
|
||||
let embedding = query_embedding.into_iter().next().unwrap_or_default();
|
||||
|
||||
@@ -137,30 +138,28 @@ pub async fn search_hybrid(
|
||||
};
|
||||
|
||||
let ranked = rank_rrf(&vec_tuples, &fts_tuples);
|
||||
let limit = filters.clamp_limit();
|
||||
|
||||
let results: Vec<HybridResult> = ranked
|
||||
.into_iter()
|
||||
.map(|r| HybridResult {
|
||||
let to_hybrid = |r: RrfResult| HybridResult {
|
||||
document_id: r.document_id,
|
||||
score: r.normalized_score,
|
||||
vector_rank: r.vector_rank,
|
||||
fts_rank: r.fts_rank,
|
||||
rrf_score: r.rrf_score,
|
||||
})
|
||||
.collect();
|
||||
};
|
||||
|
||||
let limit = filters.clamp_limit();
|
||||
let results = if filters.has_any_filter() {
|
||||
let all_ids: Vec<i64> = results.iter().map(|r| r.document_id).collect();
|
||||
let results: Vec<HybridResult> = if filters.has_any_filter() {
|
||||
let all_ids: Vec<i64> = ranked.iter().map(|r| r.document_id).collect();
|
||||
let filtered_ids = apply_filters(conn, &all_ids, filters)?;
|
||||
let filtered_set: std::collections::HashSet<i64> = filtered_ids.iter().copied().collect();
|
||||
results
|
||||
let filtered_set: std::collections::HashSet<i64> = filtered_ids.into_iter().collect();
|
||||
ranked
|
||||
.into_iter()
|
||||
.filter(|r| filtered_set.contains(&r.document_id))
|
||||
.take(limit)
|
||||
.map(to_hybrid)
|
||||
.collect()
|
||||
} else {
|
||||
results.into_iter().take(limit).collect()
|
||||
ranked.into_iter().take(limit).map(to_hybrid).collect()
|
||||
};
|
||||
|
||||
Ok((results, warnings))
|
||||
|
||||
353
tests/perf_benchmark.rs
Normal file
353
tests/perf_benchmark.rs
Normal file
@@ -0,0 +1,353 @@
|
||||
//! Performance benchmarks for optimization verification.
|
||||
//! Run with: cargo test --test perf_benchmark -- --nocapture
|
||||
|
||||
use rusqlite::Connection;
|
||||
use std::time::Instant;
|
||||
|
||||
fn setup_db() -> Connection {
|
||||
let conn = Connection::open_in_memory().unwrap();
|
||||
conn.execute_batch(
|
||||
"
|
||||
PRAGMA journal_mode = WAL;
|
||||
PRAGMA synchronous = NORMAL;
|
||||
|
||||
CREATE TABLE projects (
|
||||
id INTEGER PRIMARY KEY,
|
||||
gitlab_project_id INTEGER UNIQUE NOT NULL,
|
||||
path_with_namespace TEXT NOT NULL,
|
||||
default_branch TEXT,
|
||||
web_url TEXT,
|
||||
created_at INTEGER,
|
||||
updated_at INTEGER,
|
||||
raw_payload_id INTEGER
|
||||
);
|
||||
INSERT INTO projects (id, gitlab_project_id, path_with_namespace)
|
||||
VALUES (1, 100, 'group/project');
|
||||
|
||||
CREATE TABLE issues (
|
||||
id INTEGER PRIMARY KEY,
|
||||
gitlab_id INTEGER UNIQUE NOT NULL,
|
||||
project_id INTEGER NOT NULL REFERENCES projects(id),
|
||||
iid INTEGER NOT NULL,
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
state TEXT NOT NULL,
|
||||
author_username TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL,
|
||||
last_seen_at INTEGER NOT NULL,
|
||||
discussions_synced_for_updated_at INTEGER,
|
||||
resource_events_synced_for_updated_at INTEGER,
|
||||
web_url TEXT,
|
||||
raw_payload_id INTEGER
|
||||
);
|
||||
CREATE TABLE labels (
|
||||
id INTEGER PRIMARY KEY,
|
||||
gitlab_id INTEGER,
|
||||
project_id INTEGER NOT NULL REFERENCES projects(id),
|
||||
name TEXT NOT NULL,
|
||||
color TEXT,
|
||||
description TEXT
|
||||
);
|
||||
CREATE TABLE issue_labels (
|
||||
issue_id INTEGER NOT NULL REFERENCES issues(id),
|
||||
label_id INTEGER NOT NULL REFERENCES labels(id),
|
||||
PRIMARY KEY(issue_id, label_id)
|
||||
);
|
||||
|
||||
CREATE TABLE documents (
|
||||
id INTEGER PRIMARY KEY,
|
||||
source_type TEXT NOT NULL,
|
||||
source_id INTEGER NOT NULL,
|
||||
project_id INTEGER NOT NULL,
|
||||
author_username TEXT,
|
||||
label_names TEXT,
|
||||
created_at INTEGER,
|
||||
updated_at INTEGER,
|
||||
url TEXT,
|
||||
title TEXT,
|
||||
content_text TEXT NOT NULL,
|
||||
content_hash TEXT NOT NULL,
|
||||
labels_hash TEXT NOT NULL DEFAULT '',
|
||||
paths_hash TEXT NOT NULL DEFAULT '',
|
||||
is_truncated INTEGER NOT NULL DEFAULT 0,
|
||||
truncated_reason TEXT,
|
||||
UNIQUE(source_type, source_id)
|
||||
);
|
||||
CREATE TABLE document_labels (
|
||||
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
||||
label_name TEXT NOT NULL,
|
||||
PRIMARY KEY(document_id, label_name)
|
||||
);
|
||||
CREATE TABLE document_paths (
|
||||
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
||||
path TEXT NOT NULL,
|
||||
PRIMARY KEY(document_id, path)
|
||||
);
|
||||
CREATE TABLE dirty_sources (
|
||||
source_type TEXT NOT NULL,
|
||||
source_id INTEGER NOT NULL,
|
||||
queued_at INTEGER NOT NULL,
|
||||
attempt_count INTEGER NOT NULL DEFAULT 0,
|
||||
last_attempt_at INTEGER,
|
||||
last_error TEXT,
|
||||
next_attempt_at INTEGER,
|
||||
PRIMARY KEY(source_type, source_id)
|
||||
);
|
||||
CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at);
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
conn
|
||||
}
|
||||
|
||||
/// Simulate the OLD approach: individual INSERT per label
|
||||
fn insert_labels_individual(conn: &Connection, doc_id: i64, labels: &[&str]) {
|
||||
conn.execute(
|
||||
"DELETE FROM document_labels WHERE document_id = ?1",
|
||||
[doc_id],
|
||||
)
|
||||
.unwrap();
|
||||
for label in labels {
|
||||
conn.execute(
|
||||
"INSERT INTO document_labels (document_id, label_name) VALUES (?1, ?2)",
|
||||
rusqlite::params![doc_id, label],
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Simulate the NEW approach: batch INSERT
|
||||
fn insert_labels_batch(conn: &Connection, doc_id: i64, labels: &[&str]) {
|
||||
conn.execute(
|
||||
"DELETE FROM document_labels WHERE document_id = ?1",
|
||||
[doc_id],
|
||||
)
|
||||
.unwrap();
|
||||
if !labels.is_empty() {
|
||||
let placeholders: String = labels
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, _)| format!("(?1, ?{})", i + 2))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
let sql = format!(
|
||||
"INSERT INTO document_labels (document_id, label_name) VALUES {}",
|
||||
placeholders
|
||||
);
|
||||
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![Box::new(doc_id)];
|
||||
for label in labels {
|
||||
params.push(Box::new(*label));
|
||||
}
|
||||
let param_refs: Vec<&dyn rusqlite::types::ToSql> =
|
||||
params.iter().map(|p| p.as_ref()).collect();
|
||||
conn.execute(&sql, param_refs.as_slice()).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Simulate OLD string building: format! + push_str
|
||||
fn build_content_old(iid: i64, title: &str, project: &str, labels: &str, state: &str, author: &str, url: &str) -> String {
|
||||
let mut content = format!("[[Issue]] #{}: {}\nProject: {}\n", iid, title, project);
|
||||
content.push_str(&format!("URL: {}\n", url));
|
||||
content.push_str(&format!("Labels: {}\n", labels));
|
||||
content.push_str(&format!("State: {}\n", state));
|
||||
content.push_str(&format!("Author: @{}\n", author));
|
||||
content
|
||||
}
|
||||
|
||||
/// Simulate NEW string building: writeln! directly
|
||||
fn build_content_new(iid: i64, title: &str, project: &str, labels: &str, state: &str, author: &str, url: &str) -> String {
|
||||
use std::fmt::Write as _;
|
||||
let mut content = format!("[[Issue]] #{}: {}\nProject: {}\n", iid, title, project);
|
||||
let _ = writeln!(content, "URL: {}", url);
|
||||
let _ = writeln!(content, "Labels: {}", labels);
|
||||
let _ = writeln!(content, "State: {}", state);
|
||||
let _ = writeln!(content, "Author: @{}", author);
|
||||
content
|
||||
}
|
||||
|
||||
const LABEL_SETS: &[&[&str]] = &[
|
||||
&["bug", "critical", "backend", "needs-review", "p1"],
|
||||
&["feature", "frontend", "design", "ux"],
|
||||
&["bug", "database", "performance"],
|
||||
&["docs", "api"],
|
||||
&["infrastructure", "ci-cd", "devops", "monitoring", "alerting", "sre"],
|
||||
];
|
||||
|
||||
#[test]
|
||||
fn bench_label_insert_individual_vs_batch() {
|
||||
let conn = setup_db();
|
||||
|
||||
// Create a document to attach labels to
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
|
||||
VALUES (1, 10, 1, 42, 'Test Issue', 'opened', 1000, 2000, 3000)",
|
||||
[],
|
||||
).unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO documents (id, source_type, source_id, project_id, content_text, content_hash, labels_hash, paths_hash)
|
||||
VALUES (1, 'issue', 1, 1, 'test content', 'hash1', 'lhash1', 'phash1')",
|
||||
[],
|
||||
).unwrap();
|
||||
|
||||
let iterations = 5000;
|
||||
|
||||
// Warm up
|
||||
for labels in LABEL_SETS {
|
||||
insert_labels_individual(&conn, 1, labels);
|
||||
insert_labels_batch(&conn, 1, labels);
|
||||
}
|
||||
|
||||
// Benchmark INDIVIDUAL inserts
|
||||
let start = Instant::now();
|
||||
for i in 0..iterations {
|
||||
let labels = LABEL_SETS[i % LABEL_SETS.len()];
|
||||
insert_labels_individual(&conn, 1, labels);
|
||||
}
|
||||
let individual_elapsed = start.elapsed();
|
||||
|
||||
// Benchmark BATCH inserts
|
||||
let start = Instant::now();
|
||||
for i in 0..iterations {
|
||||
let labels = LABEL_SETS[i % LABEL_SETS.len()];
|
||||
insert_labels_batch(&conn, 1, labels);
|
||||
}
|
||||
let batch_elapsed = start.elapsed();
|
||||
|
||||
let speedup = individual_elapsed.as_nanos() as f64 / batch_elapsed.as_nanos() as f64;
|
||||
|
||||
println!("\n=== Label INSERT Benchmark ({} iterations) ===", iterations);
|
||||
println!("Individual INSERTs: {:?}", individual_elapsed);
|
||||
println!("Batch INSERT: {:?}", batch_elapsed);
|
||||
println!("Speedup: {:.2}x", speedup);
|
||||
println!();
|
||||
|
||||
// Verify correctness: both approaches produce same result
|
||||
insert_labels_individual(&conn, 1, &["a", "b", "c"]);
|
||||
let individual_labels: Vec<String> = conn
|
||||
.prepare("SELECT label_name FROM document_labels WHERE document_id = 1 ORDER BY label_name")
|
||||
.unwrap()
|
||||
.query_map([], |row| row.get(0))
|
||||
.unwrap()
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.unwrap();
|
||||
|
||||
insert_labels_batch(&conn, 1, &["a", "b", "c"]);
|
||||
let batch_labels: Vec<String> = conn
|
||||
.prepare("SELECT label_name FROM document_labels WHERE document_id = 1 ORDER BY label_name")
|
||||
.unwrap()
|
||||
.query_map([], |row| row.get(0))
|
||||
.unwrap()
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(individual_labels, batch_labels, "Both approaches must produce identical results");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bench_string_building_old_vs_new() {
|
||||
let iterations = 50_000;
|
||||
|
||||
// Warm up
|
||||
for _ in 0..100 {
|
||||
let _ = build_content_old(42, "Fix authentication bug in login flow", "mygroup/myproject", "[\"bug\",\"auth\",\"critical\"]", "opened", "alice", "https://gitlab.example.com/mygroup/myproject/-/issues/42");
|
||||
let _ = build_content_new(42, "Fix authentication bug in login flow", "mygroup/myproject", "[\"bug\",\"auth\",\"critical\"]", "opened", "alice", "https://gitlab.example.com/mygroup/myproject/-/issues/42");
|
||||
}
|
||||
|
||||
// Benchmark OLD
|
||||
let start = Instant::now();
|
||||
for i in 0..iterations {
|
||||
let s = build_content_old(
|
||||
i as i64,
|
||||
"Fix authentication bug in login flow with extended description",
|
||||
"mygroup/myproject",
|
||||
"[\"bug\",\"auth\",\"critical\",\"backend\",\"needs-review\"]",
|
||||
"opened",
|
||||
"alice",
|
||||
"https://gitlab.example.com/mygroup/myproject/-/issues/42",
|
||||
);
|
||||
std::hint::black_box(s);
|
||||
}
|
||||
let old_elapsed = start.elapsed();
|
||||
|
||||
// Benchmark NEW
|
||||
let start = Instant::now();
|
||||
for i in 0..iterations {
|
||||
let s = build_content_new(
|
||||
i as i64,
|
||||
"Fix authentication bug in login flow with extended description",
|
||||
"mygroup/myproject",
|
||||
"[\"bug\",\"auth\",\"critical\",\"backend\",\"needs-review\"]",
|
||||
"opened",
|
||||
"alice",
|
||||
"https://gitlab.example.com/mygroup/myproject/-/issues/42",
|
||||
);
|
||||
std::hint::black_box(s);
|
||||
}
|
||||
let new_elapsed = start.elapsed();
|
||||
|
||||
let speedup = old_elapsed.as_nanos() as f64 / new_elapsed.as_nanos() as f64;
|
||||
|
||||
println!("\n=== String Building Benchmark ({} iterations) ===", iterations);
|
||||
println!("format!+push_str: {:?}", old_elapsed);
|
||||
println!("writeln!: {:?}", new_elapsed);
|
||||
println!("Speedup: {:.2}x", speedup);
|
||||
println!();
|
||||
|
||||
// Verify correctness: both produce identical output
|
||||
let old = build_content_old(42, "Test", "group/proj", "[\"bug\"]", "opened", "alice", "https://example.com");
|
||||
let new = build_content_new(42, "Test", "group/proj", "[\"bug\"]", "opened", "alice", "https://example.com");
|
||||
assert_eq!(old, new, "Both approaches must produce identical strings");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bench_prepare_vs_prepare_cached() {
|
||||
let conn = setup_db();
|
||||
|
||||
// Seed some documents
|
||||
for i in 1..=100 {
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
|
||||
VALUES (?1, ?2, 1, ?1, 'Test', 'opened', 1000, 2000, 3000)",
|
||||
rusqlite::params![i, i * 10],
|
||||
).unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash, labels_hash, paths_hash)
|
||||
VALUES ('issue', ?1, 1, 'content', ?2, 'lh', 'ph')",
|
||||
rusqlite::params![i, format!("hash_{}", i)],
|
||||
).unwrap();
|
||||
}
|
||||
|
||||
let iterations = 10_000;
|
||||
let sql = "SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2";
|
||||
|
||||
// Benchmark prepare (uncached)
|
||||
let start = Instant::now();
|
||||
for i in 0..iterations {
|
||||
let source_id = (i % 100) + 1;
|
||||
let mut stmt = conn.prepare(sql).unwrap();
|
||||
let _hash: Option<String> = stmt
|
||||
.query_row(rusqlite::params!["issue", source_id as i64], |row| row.get(0))
|
||||
.ok();
|
||||
}
|
||||
let uncached_elapsed = start.elapsed();
|
||||
|
||||
// Benchmark prepare_cached
|
||||
let start = Instant::now();
|
||||
for i in 0..iterations {
|
||||
let source_id = (i % 100) + 1;
|
||||
let mut stmt = conn.prepare_cached(sql).unwrap();
|
||||
let _hash: Option<String> = stmt
|
||||
.query_row(rusqlite::params!["issue", source_id as i64], |row| row.get(0))
|
||||
.ok();
|
||||
}
|
||||
let cached_elapsed = start.elapsed();
|
||||
|
||||
let speedup = uncached_elapsed.as_nanos() as f64 / cached_elapsed.as_nanos() as f64;
|
||||
|
||||
println!("\n=== prepare vs prepare_cached Benchmark ({} iterations) ===", iterations);
|
||||
println!("prepare(): {:?}", uncached_elapsed);
|
||||
println!("prepare_cached(): {:?}", cached_elapsed);
|
||||
println!("Speedup: {:.2}x", speedup);
|
||||
println!();
|
||||
}
|
||||
Reference in New Issue
Block a user