perf: Eliminate double serialization, add SQLite tuning, optimize hot paths
11 isomorphic performance fixes from deep audit (no behavior changes): - Eliminate double serialization: store_payload now accepts pre-serialized bytes (&[u8]) instead of re-serializing from serde_json::Value. Uses Cow<[u8]> for zero-copy when compression is disabled. - Add SQLite cache_size (64MB) and mmap_size (256MB) pragmas - Replace SELECT-then-INSERT label upserts with INSERT...ON CONFLICT RETURNING in both issues.rs and merge_requests.rs - Replace INSERT + SELECT milestone upsert with RETURNING - Use prepare_cached for 5 hot-path queries in extractor.rs - Optimize compute_list_hash: index-sort + incremental SHA-256 instead of clone+sort+join+hash - Pre-allocate embedding float-to-bytes buffer with Vec::with_capacity - Replace RandomState::new() in rand_jitter with atomic counter XOR nanos - Remove redundant per-note payload storage (discussion payload contains all notes already) - Change transform_issue to accept &GitLabIssue (avoids full struct clone) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -329,7 +329,10 @@ fn store_embedding(
|
||||
) -> Result<()> {
|
||||
let rowid = encode_rowid(doc_id, chunk_index as i64);
|
||||
|
||||
let embedding_bytes: Vec<u8> = embedding.iter().flat_map(|f| f.to_le_bytes()).collect();
|
||||
let mut embedding_bytes = Vec::with_capacity(embedding.len() * 4);
|
||||
for f in embedding {
|
||||
embedding_bytes.extend_from_slice(&f.to_le_bytes());
|
||||
}
|
||||
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO embeddings (rowid, embedding) VALUES (?1, ?2)",
|
||||
|
||||
Reference in New Issue
Block a user