Files
gitlore/src/core/config.rs
teernisse 94c8613420 feat(bd-226s): implement time-decay expert scoring model
Replace flat-weight expertise scoring with exponential half-life decay,
split reviewer signals (participated vs assigned-only), dual-path rename
awareness, and new CLI flags (--as-of, --explain-score, --include-bots,
--all-history).

Changes:
- ScoringConfig: 8 new fields with validation (config.rs)
- half_life_decay() and normalize_query_path() pure functions (who.rs)
- CTE-based SQL with dual-path matching, mr_activity, reviewer_participation (who.rs)
- Rust-side decay aggregation with deterministic f64 ordering (who.rs)
- Path resolution probes check old_path columns (who.rs)
- Migration 026: 5 new indexes for dual-path and reviewer participation
- Default --since changed from 6m to 24m
- 31 new tests (example-based + invariant), 621 total who tests passing
- Autocorrect registry updated with new flags

Closes: bd-226s, bd-2w1p, bd-1soz, bd-18dn, bd-2ao4, bd-2yu5, bd-1b50,
bd-1hoq, bd-1h3f, bd-13q8, bd-11mg, bd-1vti, bd-1j5o
2026-02-12 15:44:55 -05:00

790 lines
23 KiB
Rust

use serde::Deserialize;
use std::fs;
use std::path::Path;
use super::error::{LoreError, Result};
use super::paths::get_config_path;
#[derive(Debug, Clone, Deserialize)]
pub struct GitLabConfig {
#[serde(rename = "baseUrl")]
pub base_url: String,
#[serde(rename = "tokenEnvVar", default = "default_token_env_var")]
pub token_env_var: String,
}
fn default_token_env_var() -> String {
"GITLAB_TOKEN".to_string()
}
#[derive(Debug, Clone, Deserialize)]
pub struct ProjectConfig {
pub path: String,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(default)]
pub struct SyncConfig {
#[serde(rename = "backfillDays")]
pub backfill_days: u32,
#[serde(rename = "staleLockMinutes")]
pub stale_lock_minutes: u32,
#[serde(rename = "heartbeatIntervalSeconds")]
pub heartbeat_interval_seconds: u32,
#[serde(rename = "cursorRewindSeconds")]
pub cursor_rewind_seconds: u32,
#[serde(rename = "primaryConcurrency")]
pub primary_concurrency: u32,
#[serde(rename = "dependentConcurrency")]
pub dependent_concurrency: u32,
#[serde(rename = "requestsPerSecond")]
pub requests_per_second: f64,
#[serde(rename = "fetchResourceEvents", default = "default_true")]
pub fetch_resource_events: bool,
#[serde(rename = "fetchMrFileChanges", default = "default_true")]
pub fetch_mr_file_changes: bool,
#[serde(rename = "fetchWorkItemStatus", default = "default_true")]
pub fetch_work_item_status: bool,
}
fn default_true() -> bool {
true
}
impl Default for SyncConfig {
fn default() -> Self {
Self {
backfill_days: 14,
stale_lock_minutes: 10,
heartbeat_interval_seconds: 30,
cursor_rewind_seconds: 2,
primary_concurrency: 4,
dependent_concurrency: 8,
requests_per_second: 30.0,
fetch_resource_events: true,
fetch_mr_file_changes: true,
fetch_work_item_status: true,
}
}
}
#[derive(Debug, Clone, Deserialize, Default)]
#[serde(default)]
pub struct StorageConfig {
#[serde(rename = "dbPath")]
pub db_path: Option<String>,
#[serde(rename = "backupDir")]
pub backup_dir: Option<String>,
#[serde(
rename = "compressRawPayloads",
default = "default_compress_raw_payloads"
)]
pub compress_raw_payloads: bool,
}
fn default_compress_raw_payloads() -> bool {
true
}
#[derive(Debug, Clone, Deserialize)]
#[serde(default)]
pub struct EmbeddingConfig {
pub provider: String,
pub model: String,
#[serde(rename = "baseUrl")]
pub base_url: String,
pub concurrency: u32,
}
impl Default for EmbeddingConfig {
fn default() -> Self {
Self {
provider: "ollama".to_string(),
model: "nomic-embed-text".to_string(),
base_url: "http://localhost:11434".to_string(),
concurrency: 4,
}
}
}
#[derive(Debug, Clone, Deserialize)]
#[serde(default)]
pub struct LoggingConfig {
#[serde(rename = "logDir")]
pub log_dir: Option<String>,
#[serde(rename = "retentionDays", default = "default_retention_days")]
pub retention_days: u32,
#[serde(rename = "fileLogging", default = "default_file_logging")]
pub file_logging: bool,
}
fn default_retention_days() -> u32 {
30
}
fn default_file_logging() -> bool {
true
}
impl Default for LoggingConfig {
fn default() -> Self {
Self {
log_dir: None,
retention_days: default_retention_days(),
file_logging: default_file_logging(),
}
}
}
#[derive(Debug, Clone, Deserialize)]
#[serde(default)]
pub struct ScoringConfig {
/// Points per MR where the user authored code touching the path.
#[serde(rename = "authorWeight")]
pub author_weight: i64,
/// Points per MR where the user reviewed code touching the path.
#[serde(rename = "reviewerWeight")]
pub reviewer_weight: i64,
/// Bonus points per individual inline review comment (DiffNote).
#[serde(rename = "noteBonus")]
pub note_bonus: i64,
/// Points per MR where the user was assigned as a reviewer.
#[serde(rename = "reviewerAssignmentWeight")]
pub reviewer_assignment_weight: i64,
/// Half-life in days for author contribution decay.
#[serde(rename = "authorHalfLifeDays")]
pub author_half_life_days: u32,
/// Half-life in days for reviewer contribution decay.
#[serde(rename = "reviewerHalfLifeDays")]
pub reviewer_half_life_days: u32,
/// Half-life in days for reviewer assignment decay.
#[serde(rename = "reviewerAssignmentHalfLifeDays")]
pub reviewer_assignment_half_life_days: u32,
/// Half-life in days for note/comment contribution decay.
#[serde(rename = "noteHalfLifeDays")]
pub note_half_life_days: u32,
/// Multiplier applied to scores from closed (not merged) MRs.
#[serde(rename = "closedMrMultiplier")]
pub closed_mr_multiplier: f64,
/// Minimum character count for a review note to earn note_bonus.
#[serde(rename = "reviewerMinNoteChars")]
pub reviewer_min_note_chars: u32,
/// Usernames excluded from expert/scoring results.
#[serde(rename = "excludedUsernames")]
pub excluded_usernames: Vec<String>,
}
impl Default for ScoringConfig {
fn default() -> Self {
Self {
author_weight: 25,
reviewer_weight: 10,
note_bonus: 1,
reviewer_assignment_weight: 3,
author_half_life_days: 180,
reviewer_half_life_days: 90,
reviewer_assignment_half_life_days: 45,
note_half_life_days: 45,
closed_mr_multiplier: 0.5,
reviewer_min_note_chars: 20,
excluded_usernames: vec![],
}
}
}
#[derive(Debug, Clone, Deserialize)]
pub struct Config {
pub gitlab: GitLabConfig,
pub projects: Vec<ProjectConfig>,
#[serde(rename = "defaultProject")]
pub default_project: Option<String>,
#[serde(default)]
pub sync: SyncConfig,
#[serde(default)]
pub storage: StorageConfig,
#[serde(default)]
pub embedding: EmbeddingConfig,
#[serde(default)]
pub logging: LoggingConfig,
#[serde(default)]
pub scoring: ScoringConfig,
}
impl Config {
pub fn load(cli_override: Option<&str>) -> Result<Self> {
let config_path = get_config_path(cli_override);
if !config_path.exists() {
return Err(LoreError::ConfigNotFound {
path: config_path.display().to_string(),
});
}
Self::load_from_path(&config_path)
}
pub fn load_from_path(path: &Path) -> Result<Self> {
let content = fs::read_to_string(path).map_err(|e| LoreError::ConfigInvalid {
details: format!("Failed to read config file: {e}"),
})?;
let config: Config =
serde_json::from_str(&content).map_err(|e| LoreError::ConfigInvalid {
details: format!("Invalid JSON: {e}"),
})?;
if config.projects.is_empty() {
return Err(LoreError::ConfigInvalid {
details: "At least one project is required".to_string(),
});
}
for project in &config.projects {
if project.path.is_empty() {
return Err(LoreError::ConfigInvalid {
details: "Project path cannot be empty".to_string(),
});
}
}
if url::Url::parse(&config.gitlab.base_url).is_err() {
return Err(LoreError::ConfigInvalid {
details: format!("Invalid GitLab URL: {}", config.gitlab.base_url),
});
}
if let Some(ref dp) = config.default_project {
let matched = config.projects.iter().any(|p| {
p.path.eq_ignore_ascii_case(dp)
|| p.path
.to_ascii_lowercase()
.ends_with(&format!("/{}", dp.to_ascii_lowercase()))
});
if !matched {
return Err(LoreError::ConfigInvalid {
details: format!(
"defaultProject '{}' does not match any configured project path",
dp
),
});
}
}
validate_scoring(&config.scoring)?;
Ok(config)
}
/// Return the effective project filter: CLI flag wins, then config default.
pub fn effective_project<'a>(&'a self, cli_project: Option<&'a str>) -> Option<&'a str> {
cli_project.or(self.default_project.as_deref())
}
}
fn validate_scoring(scoring: &ScoringConfig) -> Result<()> {
if scoring.author_weight < 0 {
return Err(LoreError::ConfigInvalid {
details: "scoring.authorWeight must be >= 0".to_string(),
});
}
if scoring.reviewer_weight < 0 {
return Err(LoreError::ConfigInvalid {
details: "scoring.reviewerWeight must be >= 0".to_string(),
});
}
if scoring.note_bonus < 0 {
return Err(LoreError::ConfigInvalid {
details: "scoring.noteBonus must be >= 0".to_string(),
});
}
if scoring.reviewer_assignment_weight < 0 {
return Err(LoreError::ConfigInvalid {
details: "scoring.reviewerAssignmentWeight must be >= 0".to_string(),
});
}
if scoring.author_half_life_days == 0 || scoring.author_half_life_days > 3650 {
return Err(LoreError::ConfigInvalid {
details: "scoring.authorHalfLifeDays must be in 1..=3650".to_string(),
});
}
if scoring.reviewer_half_life_days == 0 || scoring.reviewer_half_life_days > 3650 {
return Err(LoreError::ConfigInvalid {
details: "scoring.reviewerHalfLifeDays must be in 1..=3650".to_string(),
});
}
if scoring.reviewer_assignment_half_life_days == 0
|| scoring.reviewer_assignment_half_life_days > 3650
{
return Err(LoreError::ConfigInvalid {
details: "scoring.reviewerAssignmentHalfLifeDays must be in 1..=3650".to_string(),
});
}
if scoring.note_half_life_days == 0 || scoring.note_half_life_days > 3650 {
return Err(LoreError::ConfigInvalid {
details: "scoring.noteHalfLifeDays must be in 1..=3650".to_string(),
});
}
if !scoring.closed_mr_multiplier.is_finite()
|| scoring.closed_mr_multiplier <= 0.0
|| scoring.closed_mr_multiplier > 1.0
{
return Err(LoreError::ConfigInvalid {
details: "scoring.closedMrMultiplier must be finite and in (0.0, 1.0]".to_string(),
});
}
if scoring.reviewer_min_note_chars > 4096 {
return Err(LoreError::ConfigInvalid {
details: "scoring.reviewerMinNoteChars must be <= 4096".to_string(),
});
}
if scoring
.excluded_usernames
.iter()
.any(|u| u.trim().is_empty())
{
return Err(LoreError::ConfigInvalid {
details: "scoring.excludedUsernames entries must be non-empty".to_string(),
});
}
Ok(())
}
#[derive(Debug, serde::Serialize)]
pub struct MinimalConfig {
pub gitlab: MinimalGitLabConfig,
pub projects: Vec<ProjectConfig>,
#[serde(rename = "defaultProject", skip_serializing_if = "Option::is_none")]
pub default_project: Option<String>,
}
#[derive(Debug, serde::Serialize)]
pub struct MinimalGitLabConfig {
#[serde(rename = "baseUrl")]
pub base_url: String,
#[serde(rename = "tokenEnvVar")]
pub token_env_var: String,
}
impl serde::Serialize for ProjectConfig {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::SerializeStruct;
let mut state = serializer.serialize_struct("ProjectConfig", 1)?;
state.serialize_field("path", &self.path)?;
state.end()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn write_config(dir: &TempDir, scoring_json: &str) -> std::path::PathBuf {
let path = dir.path().join("config.json");
let config = format!(
r#"{{
"gitlab": {{
"baseUrl": "https://gitlab.example.com",
"tokenEnvVar": "GITLAB_TOKEN"
}},
"projects": [
{{ "path": "group/project" }}
],
"scoring": {scoring_json}
}}"#
);
fs::write(&path, config).unwrap();
path
}
fn write_config_with_default_project(
dir: &TempDir,
default_project: Option<&str>,
) -> std::path::PathBuf {
let path = dir.path().join("config.json");
let dp_field = match default_project {
Some(dp) => format!(r#","defaultProject": "{dp}""#),
None => String::new(),
};
let config = format!(
r#"{{
"gitlab": {{
"baseUrl": "https://gitlab.example.com",
"tokenEnvVar": "GITLAB_TOKEN"
}},
"projects": [
{{ "path": "group/project" }},
{{ "path": "other/repo" }}
]{dp_field}
}}"#
);
fs::write(&path, config).unwrap();
path
}
#[test]
fn test_load_rejects_negative_author_weight() {
let dir = TempDir::new().unwrap();
let path = write_config(
&dir,
r#"{
"authorWeight": -1,
"reviewerWeight": 10,
"noteBonus": 1
}"#,
);
let err = Config::load_from_path(&path).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("scoring.authorWeight"),
"unexpected error: {msg}"
);
}
#[test]
fn test_load_rejects_negative_reviewer_weight() {
let dir = TempDir::new().unwrap();
let path = write_config(
&dir,
r#"{
"authorWeight": 25,
"reviewerWeight": -1,
"noteBonus": 1
}"#,
);
let err = Config::load_from_path(&path).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("scoring.reviewerWeight"),
"unexpected error: {msg}"
);
}
#[test]
fn test_config_fetch_work_item_status_default_true() {
let config = SyncConfig::default();
assert!(config.fetch_work_item_status);
}
#[test]
fn test_config_deserialize_without_key() {
let json = r#"{}"#;
let config: SyncConfig = serde_json::from_str(json).unwrap();
assert!(
config.fetch_work_item_status,
"Missing key should default to true"
);
}
#[test]
fn test_load_rejects_negative_note_bonus() {
let dir = TempDir::new().unwrap();
let path = write_config(
&dir,
r#"{
"authorWeight": 25,
"reviewerWeight": 10,
"noteBonus": -1
}"#,
);
let err = Config::load_from_path(&path).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("scoring.noteBonus"), "unexpected error: {msg}");
}
#[test]
fn test_effective_project_cli_overrides_default() {
let config = Config {
gitlab: GitLabConfig {
base_url: "https://gitlab.example.com".to_string(),
token_env_var: "GITLAB_TOKEN".to_string(),
},
projects: vec![ProjectConfig {
path: "group/project".to_string(),
}],
default_project: Some("group/project".to_string()),
sync: SyncConfig::default(),
storage: StorageConfig::default(),
embedding: EmbeddingConfig::default(),
logging: LoggingConfig::default(),
scoring: ScoringConfig::default(),
};
assert_eq!(
config.effective_project(Some("other/repo")),
Some("other/repo")
);
}
#[test]
fn test_effective_project_falls_back_to_default() {
let config = Config {
gitlab: GitLabConfig {
base_url: "https://gitlab.example.com".to_string(),
token_env_var: "GITLAB_TOKEN".to_string(),
},
projects: vec![ProjectConfig {
path: "group/project".to_string(),
}],
default_project: Some("group/project".to_string()),
sync: SyncConfig::default(),
storage: StorageConfig::default(),
embedding: EmbeddingConfig::default(),
logging: LoggingConfig::default(),
scoring: ScoringConfig::default(),
};
assert_eq!(config.effective_project(None), Some("group/project"));
}
#[test]
fn test_effective_project_none_when_both_absent() {
let config = Config {
gitlab: GitLabConfig {
base_url: "https://gitlab.example.com".to_string(),
token_env_var: "GITLAB_TOKEN".to_string(),
},
projects: vec![ProjectConfig {
path: "group/project".to_string(),
}],
default_project: None,
sync: SyncConfig::default(),
storage: StorageConfig::default(),
embedding: EmbeddingConfig::default(),
logging: LoggingConfig::default(),
scoring: ScoringConfig::default(),
};
assert_eq!(config.effective_project(None), None);
}
#[test]
fn test_load_with_valid_default_project() {
let dir = TempDir::new().unwrap();
let path = write_config_with_default_project(&dir, Some("group/project"));
let config = Config::load_from_path(&path).unwrap();
assert_eq!(config.default_project.as_deref(), Some("group/project"));
}
#[test]
fn test_load_rejects_invalid_default_project() {
let dir = TempDir::new().unwrap();
let path = write_config_with_default_project(&dir, Some("nonexistent/project"));
let err = Config::load_from_path(&path).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("defaultProject"), "unexpected error: {msg}");
}
#[test]
fn test_load_default_project_suffix_match() {
let dir = TempDir::new().unwrap();
let path = write_config_with_default_project(&dir, Some("project"));
let config = Config::load_from_path(&path).unwrap();
assert_eq!(config.default_project.as_deref(), Some("project"));
}
#[test]
fn test_minimal_config_omits_null_default_project() {
let config = MinimalConfig {
gitlab: MinimalGitLabConfig {
base_url: "https://gitlab.example.com".to_string(),
token_env_var: "GITLAB_TOKEN".to_string(),
},
projects: vec![ProjectConfig {
path: "group/project".to_string(),
}],
default_project: None,
};
let json = serde_json::to_string(&config).unwrap();
assert!(
!json.contains("defaultProject"),
"null default_project should be omitted: {json}"
);
}
#[test]
fn test_minimal_config_includes_default_project_when_set() {
let config = MinimalConfig {
gitlab: MinimalGitLabConfig {
base_url: "https://gitlab.example.com".to_string(),
token_env_var: "GITLAB_TOKEN".to_string(),
},
projects: vec![ProjectConfig {
path: "group/project".to_string(),
}],
default_project: Some("group/project".to_string()),
};
let json = serde_json::to_string(&config).unwrap();
assert!(
json.contains("defaultProject"),
"set default_project should be present: {json}"
);
}
#[test]
fn test_config_validation_rejects_zero_half_life() {
let scoring = ScoringConfig {
author_half_life_days: 0,
..Default::default()
};
let err = validate_scoring(&scoring).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("authorHalfLifeDays"),
"unexpected error: {msg}"
);
}
#[test]
fn test_config_validation_rejects_absurd_half_life() {
let scoring = ScoringConfig {
author_half_life_days: 5000,
..Default::default()
};
let err = validate_scoring(&scoring).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("authorHalfLifeDays"),
"unexpected error: {msg}"
);
}
#[test]
fn test_config_validation_rejects_nan_multiplier() {
let scoring = ScoringConfig {
closed_mr_multiplier: f64::NAN,
..Default::default()
};
let err = validate_scoring(&scoring).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("closedMrMultiplier"),
"unexpected error: {msg}"
);
}
#[test]
fn test_config_validation_rejects_zero_multiplier() {
let scoring = ScoringConfig {
closed_mr_multiplier: 0.0,
..Default::default()
};
let err = validate_scoring(&scoring).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("closedMrMultiplier"),
"unexpected error: {msg}"
);
}
#[test]
fn test_config_validation_rejects_negative_reviewer_assignment_weight() {
let scoring = ScoringConfig {
reviewer_assignment_weight: -1,
..Default::default()
};
let err = validate_scoring(&scoring).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("reviewerAssignmentWeight"),
"unexpected error: {msg}"
);
}
#[test]
fn test_config_validation_rejects_oversized_min_note_chars() {
let scoring = ScoringConfig {
reviewer_min_note_chars: 5000,
..Default::default()
};
let err = validate_scoring(&scoring).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("reviewerMinNoteChars"),
"unexpected error: {msg}"
);
}
#[test]
fn test_config_validation_rejects_empty_excluded_username() {
let scoring = ScoringConfig {
excluded_usernames: vec!["valid".to_string(), " ".to_string()],
..Default::default()
};
let err = validate_scoring(&scoring).unwrap_err();
let msg = err.to_string();
assert!(msg.contains("excludedUsernames"), "unexpected error: {msg}");
}
#[test]
fn test_config_validation_accepts_valid_new_fields() {
let scoring = ScoringConfig {
author_half_life_days: 365,
reviewer_half_life_days: 180,
reviewer_assignment_half_life_days: 90,
note_half_life_days: 60,
closed_mr_multiplier: 0.5,
reviewer_min_note_chars: 20,
reviewer_assignment_weight: 3,
excluded_usernames: vec!["bot-user".to_string()],
..Default::default()
};
validate_scoring(&scoring).unwrap();
}
#[test]
fn test_config_validation_accepts_boundary_half_life() {
// 1 and 3650 are both valid boundaries
let scoring_min = ScoringConfig {
author_half_life_days: 1,
..Default::default()
};
validate_scoring(&scoring_min).unwrap();
let scoring_max = ScoringConfig {
author_half_life_days: 3650,
..Default::default()
};
validate_scoring(&scoring_max).unwrap();
}
#[test]
fn test_config_validation_accepts_multiplier_at_one() {
let scoring = ScoringConfig {
closed_mr_multiplier: 1.0,
..Default::default()
};
validate_scoring(&scoring).unwrap();
}
}