Wave 7: Phase 2 features - sync --all, external refs, cross-alias discovery, CI/CD, reliability tests (bd-1ky, bd-1bp, bd-1rk, bd-1lj, bd-gvr, bd-1x5)

- Sync --all with async concurrency, per-host throttling, failure budgets, resumable execution
- External ref bundling at fetch time with origin tracking
- Cross-alias discovery (--all-aliases) for list and search commands
- CI/CD pipeline (.gitlab-ci.yml), cargo-deny config, Dockerfile, install script
- Reliability test suite: crash consistency (8 tests), lock contention (3 tests), property-based (4 tests)
- Criterion performance benchmarks (5 benchmarks)
- Bug fix: doctor --fix now repairs missing index.json when raw.json exists
- Bug fix: shared $ref references no longer incorrectly flagged as circular (refs.rs)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
teernisse
2026-02-12 15:29:31 -05:00
parent 398311ca4c
commit 4ac8659ebd
20 changed files with 3430 additions and 68 deletions

View File

@@ -159,8 +159,14 @@ fn check_alias(cm: &CacheManager, alias: &str, stale_threshold_days: u32) -> Ali
}
Err(e) => {
issues.push(format!("load error: {e}"));
status = HealthStatus::Unhealthy;
unfixable = true;
// If raw.json exists, the index can be rebuilt from it
if cm.alias_dir(alias).join("raw.json").exists() {
status = HealthStatus::Degraded;
fixable = true;
} else {
status = HealthStatus::Unhealthy;
unfixable = true;
}
(None, None)
}
};

View File

@@ -8,6 +8,7 @@ use tokio::io::AsyncReadExt;
use crate::core::cache::{CacheManager, compute_hash, validate_alias};
use crate::core::config::{AuthType, Config, cache_dir, config_path, resolve_credential};
use crate::core::external_refs::{ExternalRefConfig, resolve_external_refs};
use crate::core::http::AsyncHttpClient;
use crate::core::indexer::{Format, build_index, detect_format, normalize_to_json};
use crate::core::network::{NetworkPolicy, resolve_policy};
@@ -63,6 +64,22 @@ pub struct Args {
/// Allow plain HTTP (insecure)
#[arg(long)]
pub allow_insecure_http: bool,
/// Resolve external $ref entries by fetching and inlining them
#[arg(long)]
pub resolve_external_refs: bool,
/// Allowed host for external ref fetching (repeatable, required with --resolve-external-refs)
#[arg(long = "ref-allow-host")]
pub ref_allow_host: Vec<String>,
/// Maximum chain depth for transitive external refs (default: 10)
#[arg(long, default_value = "10")]
pub ref_max_depth: u32,
/// Maximum total bytes fetched for external refs (default: 10 MB)
#[arg(long, default_value = "10485760")]
pub ref_max_bytes: u64,
}
// ---------------------------------------------------------------------------
@@ -274,7 +291,36 @@ async fn fetch_inner(
};
let json_bytes = normalize_to_json(&raw_bytes, format)?;
let value: serde_json::Value = serde_json::from_slice(&json_bytes)?;
let mut value: serde_json::Value = serde_json::from_slice(&json_bytes)?;
// External ref resolution (optional)
if args.resolve_external_refs {
if args.ref_allow_host.is_empty() {
return Err(SwaggerCliError::Usage(
"--resolve-external-refs requires at least one --ref-allow-host".to_string(),
));
}
let ref_config = ExternalRefConfig {
allow_hosts: args.ref_allow_host.clone(),
max_depth: args.ref_max_depth,
max_bytes: args.ref_max_bytes,
};
let ref_client = AsyncHttpClient::builder()
.overall_timeout(Duration::from_millis(args.timeout_ms))
.max_bytes(args.max_bytes)
.max_retries(args.retries)
.allow_insecure_http(args.allow_insecure_http)
.allowed_private_hosts(args.allow_private_host.clone())
.network_policy(network_policy)
.build();
resolve_external_refs(&mut value, source_url.as_deref(), &ref_config, &ref_client).await?;
}
// Re-serialize the (possibly bundled) value to get the final json_bytes
let json_bytes = serde_json::to_vec(&value)?;
// Compute content hash for indexing
let content_hash = compute_hash(&raw_bytes);
@@ -438,6 +484,10 @@ mod tests {
retries: 2,
allow_private_host: vec![],
allow_insecure_http: false,
resolve_external_refs: false,
ref_allow_host: vec![],
ref_max_depth: 10,
ref_max_bytes: 10485760,
}
}
@@ -615,6 +665,10 @@ mod tests {
retries: 2,
allow_private_host: vec![],
allow_insecure_http: false,
resolve_external_refs: false,
ref_allow_host: vec![],
ref_max_depth: 10,
ref_max_bytes: 10485760,
}
}

View File

@@ -16,8 +16,12 @@ use crate::output::table::render_table_or_empty;
/// List endpoints from a cached spec
#[derive(Debug, ClapArgs)]
pub struct Args {
/// Alias of the cached spec
pub alias: String,
/// Alias of the cached spec (omit when using --all-aliases)
pub alias: Option<String>,
/// Query across every cached alias
#[arg(long)]
pub all_aliases: bool,
/// Filter by HTTP method (case-insensitive)
#[arg(long, short = 'm')]
@@ -57,6 +61,18 @@ struct ListOutput {
meta: ListMeta,
}
#[derive(Debug, Serialize)]
struct AllAliasesListOutput {
endpoints: Vec<AliasEndpointEntry>,
aliases_searched: Vec<String>,
total: usize,
filtered: usize,
applied_filters: BTreeMap<String, String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
warnings: Vec<String>,
duration_ms: u64,
}
#[derive(Debug, Serialize)]
struct EndpointEntry {
path: String,
@@ -67,6 +83,17 @@ struct EndpointEntry {
deprecated: bool,
}
#[derive(Debug, Serialize)]
struct AliasEndpointEntry {
alias: String,
path: String,
method: String,
summary: Option<String>,
operation_id: Option<String>,
tags: Vec<String>,
deprecated: bool,
}
#[derive(Debug, Serialize)]
struct ListMeta {
alias: String,
@@ -89,11 +116,31 @@ struct EndpointRow {
summary: String,
}
#[derive(Tabled)]
struct AliasEndpointRow {
#[tabled(rename = "ALIAS")]
alias: String,
#[tabled(rename = "METHOD")]
method: String,
#[tabled(rename = "PATH")]
path: String,
#[tabled(rename = "SUMMARY")]
summary: String,
}
// ---------------------------------------------------------------------------
// Execute
// ---------------------------------------------------------------------------
pub async fn execute(args: &Args, robot_mode: bool) -> Result<(), SwaggerCliError> {
if args.all_aliases {
return execute_all_aliases(args, robot_mode).await;
}
let alias = args.alias.as_deref().ok_or_else(|| {
SwaggerCliError::Usage("An alias is required unless --all-aliases is specified".to_string())
})?;
let start = Instant::now();
// Compile path regex early so we fail fast on invalid patterns
@@ -108,7 +155,7 @@ pub async fn execute(args: &Args, robot_mode: bool) -> Result<(), SwaggerCliErro
};
let cm = CacheManager::new(cache_dir());
let (index, meta) = cm.load_index(&args.alias)?;
let (index, meta) = cm.load_index(alias)?;
let total = index.endpoints.len();
@@ -211,7 +258,7 @@ pub async fn execute(args: &Args, robot_mode: bool) -> Result<(), SwaggerCliErro
filtered: filtered_count,
applied_filters,
meta: ListMeta {
alias: args.alias.clone(),
alias: alias.to_string(),
spec_version: meta.spec_version.clone(),
cached_at: meta.fetched_at.to_rfc3339(),
duration_ms: duration.as_millis().min(u64::MAX as u128) as u64,
@@ -253,6 +300,173 @@ pub async fn execute(args: &Args, robot_mode: bool) -> Result<(), SwaggerCliErro
Ok(())
}
// ---------------------------------------------------------------------------
// All-aliases execution
// ---------------------------------------------------------------------------
async fn execute_all_aliases(args: &Args, robot_mode: bool) -> Result<(), SwaggerCliError> {
let start = Instant::now();
let path_regex = match &args.path {
Some(pattern) => {
let re = Regex::new(pattern).map_err(|e| {
SwaggerCliError::Usage(format!("Invalid path regex '{pattern}': {e}"))
})?;
Some(re)
}
None => None,
};
let cm = CacheManager::new(cache_dir());
let alias_metas = cm.list_aliases()?;
if alias_metas.is_empty() {
return Err(SwaggerCliError::Usage(
"No cached aliases found. Fetch a spec first with 'swagger-cli fetch'.".to_string(),
));
}
let method_upper = args.method.as_ref().map(|m| m.to_uppercase());
let tag_lower = args.tag.as_ref().map(|t| t.to_lowercase());
let mut all_entries: Vec<AliasEndpointEntry> = Vec::new();
let mut aliases_searched: Vec<String> = Vec::new();
let mut warnings: Vec<String> = Vec::new();
let mut total_endpoints: usize = 0;
let mut sorted_aliases: Vec<_> = alias_metas.iter().map(|m| m.alias.as_str()).collect();
sorted_aliases.sort();
for alias_name in &sorted_aliases {
match cm.load_index(alias_name) {
Ok((index, _meta)) => {
aliases_searched.push((*alias_name).to_string());
total_endpoints += index.endpoints.len();
for ep in &index.endpoints {
if let Some(ref m) = method_upper
&& ep.method.to_uppercase() != *m
{
continue;
}
if let Some(ref t) = tag_lower
&& !ep
.tags
.iter()
.any(|tag| tag.to_lowercase().contains(t.as_str()))
{
continue;
}
if let Some(ref re) = path_regex
&& !re.is_match(&ep.path)
{
continue;
}
all_entries.push(AliasEndpointEntry {
alias: (*alias_name).to_string(),
path: ep.path.clone(),
method: ep.method.clone(),
summary: ep.summary.clone(),
operation_id: ep.operation_id.clone(),
tags: ep.tags.clone(),
deprecated: ep.deprecated,
});
}
}
Err(e) => {
warnings.push(format!("Failed to load alias '{alias_name}': {e}"));
}
}
}
if aliases_searched.is_empty() {
return Err(SwaggerCliError::Cache(
"All aliases failed to load".to_string(),
));
}
let filtered_count = all_entries.len();
// Sort: alias ASC, path ASC, method_rank ASC
all_entries.sort_by(|a, b| {
a.alias
.cmp(&b.alias)
.then_with(|| a.path.cmp(&b.path))
.then_with(|| method_rank(&a.method).cmp(&method_rank(&b.method)))
});
// ---- Limit ----
if !args.all {
all_entries.truncate(args.limit);
}
let duration = start.elapsed();
if robot_mode {
let mut applied_filters = BTreeMap::new();
if let Some(ref m) = args.method {
applied_filters.insert("method".into(), m.clone());
}
if let Some(ref t) = args.tag {
applied_filters.insert("tag".into(), t.clone());
}
if let Some(ref p) = args.path {
applied_filters.insert("path".into(), p.clone());
}
let output = AllAliasesListOutput {
endpoints: all_entries,
aliases_searched,
total: total_endpoints,
filtered: filtered_count,
applied_filters,
warnings,
duration_ms: duration.as_millis().min(u64::MAX as u128) as u64,
};
robot::robot_success(output, "list", duration);
} else {
println!("All aliases ({} searched)\n", aliases_searched.len());
let rows: Vec<AliasEndpointRow> = all_entries
.iter()
.map(|ep| AliasEndpointRow {
alias: ep.alias.clone(),
method: ep.method.clone(),
path: ep.path.clone(),
summary: ep.summary.clone().unwrap_or_default(),
})
.collect();
let table = render_table_or_empty(&rows, "No endpoints match the given filters.");
println!("{table}");
if !rows.is_empty() {
println!();
if filtered_count > rows.len() {
println!(
"Showing {} of {} (filtered from {}). Use --all to show everything.",
rows.len(),
filtered_count,
total_endpoints
);
} else {
println!("Showing {} of {}", rows.len(), total_endpoints);
}
}
if !warnings.is_empty() {
println!();
for w in &warnings {
eprintln!("Warning: {w}");
}
}
}
Ok(())
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

View File

@@ -5,6 +5,7 @@ use serde::Serialize;
use crate::core::cache::CacheManager;
use crate::core::config::cache_dir;
use crate::core::indexer::method_rank;
use crate::core::search::{SearchEngine, SearchOptions, SearchResult, SearchResultType};
use crate::errors::SwaggerCliError;
use crate::output::robot;
@@ -12,11 +13,17 @@ use crate::output::robot;
/// Search endpoints and schemas by keyword
#[derive(Debug, ClapArgs)]
pub struct Args {
/// Alias of the cached spec
pub alias: String,
/// Alias of the cached spec, or search query when using --all-aliases
#[arg(required = true)]
pub alias_or_query: String,
/// Search query
pub query: String,
/// Search query (required unless --all-aliases is used, in which case the
/// first positional argument is treated as the query)
pub query: Option<String>,
/// Query across every cached alias
#[arg(long)]
pub all_aliases: bool,
/// Case-sensitive matching
#[arg(long)]
@@ -45,6 +52,15 @@ struct RobotOutput {
total: usize,
}
#[derive(Debug, Serialize)]
struct AllAliasesRobotOutput {
results: Vec<AliasRobotResult>,
aliases_searched: Vec<String>,
total: usize,
#[serde(skip_serializing_if = "Vec::is_empty")]
warnings: Vec<String>,
}
#[derive(Debug, Serialize)]
struct RobotResult {
#[serde(rename = "type")]
@@ -59,6 +75,21 @@ struct RobotResult {
matches: Vec<RobotMatch>,
}
#[derive(Debug, Serialize)]
struct AliasRobotResult {
alias: String,
#[serde(rename = "type")]
result_type: &'static str,
name: String,
#[serde(skip_serializing_if = "Option::is_none")]
method: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
summary: Option<String>,
rank: usize,
score: u32,
matches: Vec<RobotMatch>,
}
#[derive(Debug, Serialize)]
struct RobotMatch {
field: String,
@@ -123,7 +154,31 @@ fn parse_in_fields(raw: &str) -> Result<(bool, bool, bool), SwaggerCliError> {
// Execute
// ---------------------------------------------------------------------------
/// Extract (alias, query) from args. When `--all-aliases` is set, the first
/// positional is the query; otherwise first is alias and second is query.
fn resolve_alias_and_query(args: &Args) -> Result<(Option<&str>, &str), SwaggerCliError> {
if args.all_aliases {
// In all-aliases mode: alias_or_query IS the query
Ok((None, &args.alias_or_query))
} else {
// Normal mode: alias_or_query is the alias, query is required
let query = args.query.as_deref().ok_or_else(|| {
SwaggerCliError::Usage(
"A search query is required. Usage: search <alias> <query>".to_string(),
)
})?;
Ok((Some(args.alias_or_query.as_str()), query))
}
}
pub async fn execute(args: &Args, robot_mode: bool) -> Result<(), SwaggerCliError> {
if args.all_aliases {
return execute_all_aliases(args, robot_mode).await;
}
let (alias, query) = resolve_alias_and_query(args)?;
let alias = alias.expect("alias is always Some in non-all-aliases mode");
let start = Instant::now();
let (search_paths, search_descriptions, search_schemas) = match &args.in_fields {
@@ -132,7 +187,7 @@ pub async fn execute(args: &Args, robot_mode: bool) -> Result<(), SwaggerCliErro
};
let cm = CacheManager::new(cache_dir());
let (index, _meta) = cm.load_index(&args.alias)?;
let (index, _meta) = cm.load_index(alias)?;
let opts = SearchOptions {
search_paths,
@@ -144,7 +199,7 @@ pub async fn execute(args: &Args, robot_mode: bool) -> Result<(), SwaggerCliErro
};
let engine = SearchEngine::new(&index);
let results = engine.search(&args.query, &opts);
let results = engine.search(query, &opts);
if robot_mode {
let output = RobotOutput {
@@ -153,13 +208,12 @@ pub async fn execute(args: &Args, robot_mode: bool) -> Result<(), SwaggerCliErro
};
robot::robot_success(output, "search", start.elapsed());
} else if results.is_empty() {
println!("No results found for '{}'", args.query);
println!("No results found for '{query}'");
} else {
println!(
"Found {} result{} for '{}':\n",
"Found {} result{} for '{query}':\n",
results.len(),
if results.len() == 1 { "" } else { "s" },
args.query,
);
for r in &results {
let type_label = match r.result_type {
@@ -188,3 +242,168 @@ pub async fn execute(args: &Args, robot_mode: bool) -> Result<(), SwaggerCliErro
Ok(())
}
// ---------------------------------------------------------------------------
// All-aliases execution
// ---------------------------------------------------------------------------
async fn execute_all_aliases(args: &Args, robot_mode: bool) -> Result<(), SwaggerCliError> {
let (_alias, query) = resolve_alias_and_query(args)?;
let start = Instant::now();
let (search_paths, search_descriptions, search_schemas) = match &args.in_fields {
Some(fields) => parse_in_fields(fields)?,
None => (true, true, true),
};
let cm = CacheManager::new(cache_dir());
let alias_metas = cm.list_aliases()?;
if alias_metas.is_empty() {
return Err(SwaggerCliError::Usage(
"No cached aliases found. Fetch a spec first with 'swagger-cli fetch'.".to_string(),
));
}
let opts = SearchOptions {
search_paths,
search_descriptions,
search_schemas,
case_sensitive: args.case_sensitive,
exact: args.exact,
limit: usize::MAX, // collect all, then limit after merge
};
let mut all_results: Vec<(String, SearchResult)> = Vec::new();
let mut aliases_searched: Vec<String> = Vec::new();
let mut warnings: Vec<String> = Vec::new();
let mut sorted_aliases: Vec<_> = alias_metas.iter().map(|m| m.alias.clone()).collect();
sorted_aliases.sort();
for alias_name in &sorted_aliases {
match cm.load_index(alias_name) {
Ok((index, _meta)) => {
aliases_searched.push(alias_name.clone());
let engine = SearchEngine::new(&index);
let results = engine.search(query, &opts);
for r in results {
all_results.push((alias_name.clone(), r));
}
}
Err(e) => {
warnings.push(format!("Failed to load alias '{alias_name}': {e}"));
}
}
}
if aliases_searched.is_empty() {
return Err(SwaggerCliError::Cache(
"All aliases failed to load".to_string(),
));
}
// Sort by score DESC, then type ordinal ASC, name ASC, method_rank ASC, alias ASC
all_results.sort_by(|(alias_a, a), (alias_b, b)| {
b.score
.cmp(&a.score)
.then_with(|| a.result_type.ordinal().cmp(&b.result_type.ordinal()))
.then_with(|| a.name.cmp(&b.name))
.then_with(|| {
let a_rank = a.method.as_deref().map(method_rank).unwrap_or(u8::MAX);
let b_rank = b.method.as_deref().map(method_rank).unwrap_or(u8::MAX);
a_rank.cmp(&b_rank)
})
.then_with(|| alias_a.cmp(alias_b))
});
// Apply limit
all_results.truncate(args.limit);
// Assign 1-based ranks
for (i, (_alias, result)) in all_results.iter_mut().enumerate() {
result.rank = i + 1;
}
let total = all_results.len();
if robot_mode {
let robot_results: Vec<AliasRobotResult> = all_results
.iter()
.map(|(alias, r)| AliasRobotResult {
alias: alias.clone(),
result_type: match r.result_type {
SearchResultType::Endpoint => "endpoint",
SearchResultType::Schema => "schema",
},
name: r.name.clone(),
method: r.method.clone(),
summary: r.summary.clone(),
rank: r.rank,
score: r.score,
matches: r
.matches
.iter()
.map(|m| RobotMatch {
field: m.field.clone(),
snippet: m.snippet.clone(),
})
.collect(),
})
.collect();
let output = AllAliasesRobotOutput {
total,
results: robot_results,
aliases_searched,
warnings,
};
robot::robot_success(output, "search", start.elapsed());
} else {
if all_results.is_empty() {
println!(
"No results found for '{query}' across {} aliases",
aliases_searched.len()
);
} else {
println!(
"Found {} result{} for '{query}' across {} aliases:\n",
total,
if total == 1 { "" } else { "s" },
aliases_searched.len(),
);
for (alias, r) in &all_results {
let type_label = match r.result_type {
SearchResultType::Endpoint => "endpoint",
SearchResultType::Schema => "schema",
};
let method_str = r
.method
.as_deref()
.map(|m| format!("{m} "))
.unwrap_or_default();
let summary_str = r
.summary
.as_deref()
.map(|s| format!(" - {s}"))
.unwrap_or_default();
println!(
" {rank}. [{alias}] [{type_label}] {method_str}{name}{summary_str} (score: {score})",
rank = r.rank,
name = r.name,
score = r.score,
);
}
}
if !warnings.is_empty() {
println!();
for w in &warnings {
eprintln!("Warning: {w}");
}
}
}
Ok(())
}

File diff suppressed because it is too large Load Diff

641
src/core/external_refs.rs Normal file
View File

@@ -0,0 +1,641 @@
use std::collections::HashSet;
use std::future::Future;
use std::pin::Pin;
use reqwest::Url;
use serde_json::Value;
use crate::core::http::AsyncHttpClient;
use crate::core::indexer::{detect_format, normalize_to_json};
use crate::errors::SwaggerCliError;
/// Configuration for external `$ref` resolution.
pub struct ExternalRefConfig {
/// Allowed hostnames for external ref fetching.
pub allow_hosts: Vec<String>,
/// Maximum chain depth for transitive external refs.
pub max_depth: u32,
/// Maximum total bytes fetched across all external refs.
pub max_bytes: u64,
}
/// Statistics returned after resolving external refs.
#[derive(Debug, Default)]
pub struct ResolveStats {
pub refs_resolved: usize,
pub refs_skipped: usize,
pub total_bytes_fetched: u64,
}
/// Resolve all external `$ref` entries in `value` by fetching and inlining them.
///
/// - Only resolves refs whose URL host is in `config.allow_hosts`.
/// - Internal refs (starting with `#/`) are left untouched.
/// - Circular external refs are detected and replaced with a marker.
/// - Stops when `config.max_depth` or `config.max_bytes` is exceeded.
///
/// Returns the resolution statistics.
pub async fn resolve_external_refs(
value: &mut Value,
base_url: Option<&str>,
config: &ExternalRefConfig,
client: &AsyncHttpClient,
) -> Result<ResolveStats, SwaggerCliError> {
let mut visited = HashSet::new();
let mut stats = ResolveStats::default();
resolve_recursive(value, base_url, config, client, 0, &mut visited, &mut stats).await?;
Ok(stats)
}
fn resolve_recursive<'a>(
value: &'a mut Value,
base_url: Option<&'a str>,
config: &'a ExternalRefConfig,
client: &'a AsyncHttpClient,
depth: u32,
visited: &'a mut HashSet<String>,
stats: &'a mut ResolveStats,
) -> Pin<Box<dyn Future<Output = Result<(), SwaggerCliError>> + Send + 'a>> {
Box::pin(async move {
if let Some(ref_str) = extract_external_ref(value) {
// Internal refs: leave untouched
if ref_str.starts_with("#/") {
return Ok(());
}
// Resolve the URL (may be relative)
let resolved_url = resolve_ref_url(&ref_str, base_url)?;
// Check for cycles
if visited.contains(&resolved_url) {
*value = serde_json::json!({ "$circular_external_ref": resolved_url });
stats.refs_skipped += 1;
return Ok(());
}
// Check depth limit
if depth >= config.max_depth {
stats.refs_skipped += 1;
return Ok(());
}
// Check host allowlist
let parsed = Url::parse(&resolved_url).map_err(|e| {
SwaggerCliError::InvalidSpec(format!(
"invalid external ref URL '{resolved_url}': {e}"
))
})?;
let host = parsed.host_str().ok_or_else(|| {
SwaggerCliError::InvalidSpec(format!(
"external ref URL '{resolved_url}' has no host"
))
})?;
if !config.allow_hosts.iter().any(|h| h == host) {
return Err(SwaggerCliError::PolicyBlocked(format!(
"external ref host '{host}' is not in --ref-allow-host allowlist. \
Add --ref-allow-host {host} to allow fetching from this host."
)));
}
// Check bytes limit before fetch
if stats.total_bytes_fetched >= config.max_bytes {
stats.refs_skipped += 1;
return Ok(());
}
// Fetch the external ref
let result = client.fetch_spec(&resolved_url).await?;
let fetched_bytes = result.bytes.len() as u64;
if stats.total_bytes_fetched + fetched_bytes > config.max_bytes {
return Err(SwaggerCliError::PolicyBlocked(format!(
"external ref total bytes would exceed --ref-max-bytes limit of {}. \
Resolved {} refs ({} bytes) before hitting the limit.",
config.max_bytes, stats.refs_resolved, stats.total_bytes_fetched
)));
}
stats.total_bytes_fetched += fetched_bytes;
// Parse the fetched content as JSON or YAML
let format = detect_format(
&result.bytes,
Some(&resolved_url),
result.content_type.as_deref(),
);
let json_bytes = normalize_to_json(&result.bytes, format).map_err(|_| {
SwaggerCliError::InvalidSpec(format!(
"external ref '{resolved_url}' returned invalid JSON/YAML"
))
})?;
let mut fetched_value: Value = serde_json::from_slice(&json_bytes)?;
// Handle fragment pointer within the fetched document
if let Some(frag) = parsed.fragment()
&& !frag.is_empty()
{
let pointer = if frag.starts_with('/') {
frag.to_string()
} else {
format!("/{frag}")
};
fetched_value = crate::core::refs::resolve_json_pointer(&fetched_value, &pointer)
.cloned()
.ok_or_else(|| {
SwaggerCliError::InvalidSpec(format!(
"fragment '{pointer}' not found in external ref '{resolved_url}'"
))
})?;
}
// Mark as visited for cycle detection, then recursively resolve nested external refs
visited.insert(resolved_url.clone());
// The base URL for nested refs is the URL of the document we just fetched (without fragment)
let nested_base = strip_fragment(&resolved_url);
resolve_recursive(
&mut fetched_value,
Some(&nested_base),
config,
client,
depth + 1,
visited,
stats,
)
.await?;
visited.remove(&resolved_url);
*value = fetched_value;
stats.refs_resolved += 1;
return Ok(());
}
// Walk into objects and arrays
match value {
Value::Object(map) => {
// Collect keys first to satisfy borrow checker with recursive async
let keys: Vec<String> = map.keys().cloned().collect();
for key in keys {
if let Some(val) = map.get_mut(&key) {
resolve_recursive(val, base_url, config, client, depth, visited, stats)
.await?;
}
}
}
Value::Array(arr) => {
for item in arr.iter_mut() {
resolve_recursive(item, base_url, config, client, depth, visited, stats)
.await?;
}
}
_ => {}
}
Ok(())
})
}
/// Extract the `$ref` string from a JSON object if present.
fn extract_external_ref(value: &Value) -> Option<String> {
let map = value.as_object()?;
let ref_val = map.get("$ref")?;
Some(ref_val.as_str()?.to_string())
}
/// Resolve a possibly-relative ref URL against a base URL.
fn resolve_ref_url(ref_str: &str, base_url: Option<&str>) -> Result<String, SwaggerCliError> {
// If the ref is already absolute, use it directly
if ref_str.contains("://") {
return Ok(ref_str.to_string());
}
// Relative ref requires a base URL
let base = base_url.ok_or_else(|| {
SwaggerCliError::InvalidSpec(format!(
"relative external ref '{ref_str}' cannot be resolved without a base URL"
))
})?;
let base_parsed = Url::parse(base)
.map_err(|e| SwaggerCliError::InvalidSpec(format!("invalid base URL '{base}': {e}")))?;
base_parsed
.join(ref_str)
.map(|u| u.to_string())
.map_err(|e| {
SwaggerCliError::InvalidSpec(format!(
"failed to resolve relative ref '{ref_str}' against base '{base}': {e}"
))
})
}
/// Strip the fragment portion from a URL string.
fn strip_fragment(url: &str) -> String {
match url.find('#') {
Some(idx) => url[..idx].to_string(),
None => url.to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
// -- URL resolution -------------------------------------------------------
#[test]
fn test_resolve_absolute_ref() {
let result = resolve_ref_url("https://example.com/schemas/Pet.json", None).unwrap();
assert_eq!(result, "https://example.com/schemas/Pet.json");
}
#[test]
fn test_resolve_relative_ref() {
let result = resolve_ref_url(
"./schemas/Pet.json",
Some("https://example.com/api/spec.json"),
)
.unwrap();
assert_eq!(result, "https://example.com/api/schemas/Pet.json");
}
#[test]
fn test_resolve_relative_parent() {
let result = resolve_ref_url(
"../schemas/Pet.json",
Some("https://example.com/api/v1/spec.json"),
)
.unwrap();
assert_eq!(result, "https://example.com/api/schemas/Pet.json");
}
#[test]
fn test_resolve_relative_without_base_fails() {
let result = resolve_ref_url("./schemas/Pet.json", None);
assert!(result.is_err());
}
// -- Fragment stripping ---------------------------------------------------
#[test]
fn test_strip_fragment() {
assert_eq!(
strip_fragment("https://example.com/spec.json#/components/schemas/Pet"),
"https://example.com/spec.json"
);
assert_eq!(
strip_fragment("https://example.com/spec.json"),
"https://example.com/spec.json"
);
}
// -- External ref extraction ----------------------------------------------
#[test]
fn test_extract_external_ref_present() {
let v = json!({"$ref": "https://example.com/Pet.json"});
assert_eq!(
extract_external_ref(&v),
Some("https://example.com/Pet.json".to_string())
);
}
#[test]
fn test_extract_external_ref_internal() {
let v = json!({"$ref": "#/components/schemas/Pet"});
assert_eq!(
extract_external_ref(&v),
Some("#/components/schemas/Pet".to_string())
);
}
#[test]
fn test_extract_external_ref_absent() {
let v = json!({"type": "string"});
assert_eq!(extract_external_ref(&v), None);
}
// -- Integration tests with mockito ---------------------------------------
#[tokio::test]
async fn test_resolve_external_ref_allowed_host() {
let mut server = mockito::Server::new_async().await;
let host = server.host_with_port();
let pet_schema = json!({
"type": "object",
"properties": {
"name": { "type": "string" }
}
});
let _mock = server
.mock("GET", "/schemas/Pet.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&pet_schema).unwrap())
.create_async()
.await;
let spec_url = format!("http://{host}/api/spec.json");
let ref_url = format!("http://{host}/schemas/Pet.json");
let mut value = json!({
"openapi": "3.0.3",
"components": {
"schemas": {
"Pet": { "$ref": ref_url }
}
}
});
let hostname = host.split(':').next().unwrap().to_string();
let config = ExternalRefConfig {
allow_hosts: vec![hostname.clone()],
max_depth: 5,
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder()
.allow_insecure_http(true)
.allowed_private_hosts(vec![hostname.clone()])
.build();
let stats = resolve_external_refs(&mut value, Some(&spec_url), &config, &client)
.await
.unwrap();
assert_eq!(stats.refs_resolved, 1);
assert_eq!(value["components"]["schemas"]["Pet"]["type"], "object");
assert_eq!(
value["components"]["schemas"]["Pet"]["properties"]["name"]["type"],
"string"
);
}
#[tokio::test]
async fn test_resolve_external_ref_disallowed_host() {
let mut value = json!({
"schema": { "$ref": "https://evil.example.com/schemas/Pet.json" }
});
let config = ExternalRefConfig {
allow_hosts: vec!["safe.example.com".to_string()],
max_depth: 5,
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder().build();
let result = resolve_external_refs(&mut value, None, &config, &client).await;
assert!(result.is_err());
match result.unwrap_err() {
SwaggerCliError::PolicyBlocked(msg) => {
assert!(msg.contains("evil.example.com"));
assert!(msg.contains("--ref-allow-host"));
}
other => panic!("expected PolicyBlocked, got: {other:?}"),
}
}
#[tokio::test]
async fn test_resolve_internal_refs_untouched() {
let mut value = json!({
"schema": { "$ref": "#/components/schemas/Pet" }
});
let config = ExternalRefConfig {
allow_hosts: vec![],
max_depth: 5,
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder().build();
let stats = resolve_external_refs(&mut value, None, &config, &client)
.await
.unwrap();
assert_eq!(stats.refs_resolved, 0);
assert_eq!(value["schema"]["$ref"], "#/components/schemas/Pet");
}
#[tokio::test]
async fn test_resolve_max_depth_limits_chains() {
let mut server = mockito::Server::new_async().await;
let host = server.host_with_port();
// Chain: spec -> A.json -> B.json
// With max_depth=1, only A.json should be resolved
let b_schema = json!({
"type": "string",
"from": "B"
});
let _mock_b = server
.mock("GET", "/B.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&b_schema).unwrap())
.create_async()
.await;
let a_schema = json!({
"type": "object",
"nested": { "$ref": format!("http://{host}/B.json") }
});
let _mock_a = server
.mock("GET", "/A.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&a_schema).unwrap())
.create_async()
.await;
let ref_url = format!("http://{host}/A.json");
let mut value = json!({
"schema": { "$ref": ref_url }
});
let hostname = host.split(':').next().unwrap().to_string();
let config = ExternalRefConfig {
allow_hosts: vec![hostname.clone()],
max_depth: 1, // Only one level deep
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder()
.allow_insecure_http(true)
.allowed_private_hosts(vec![hostname.clone()])
.build();
let stats = resolve_external_refs(&mut value, None, &config, &client)
.await
.unwrap();
// A was resolved (depth 0), B was skipped (depth 1 >= max_depth 1)
assert_eq!(stats.refs_resolved, 1);
assert_eq!(stats.refs_skipped, 1);
assert_eq!(value["schema"]["type"], "object");
}
#[tokio::test]
async fn test_resolve_circular_external_refs() {
let mut server = mockito::Server::new_async().await;
let host = server.host_with_port();
// A.json refs B.json, B.json refs A.json
let b_schema = json!({
"type": "object",
"back": { "$ref": format!("http://{host}/A.json") }
});
let _mock_b = server
.mock("GET", "/B.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&b_schema).unwrap())
.create_async()
.await;
let a_schema = json!({
"type": "object",
"next": { "$ref": format!("http://{host}/B.json") }
});
let _mock_a = server
.mock("GET", "/A.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&a_schema).unwrap())
.create_async()
.await;
let ref_url = format!("http://{host}/A.json");
let mut value = json!({
"schema": { "$ref": ref_url }
});
let hostname = host.split(':').next().unwrap().to_string();
let config = ExternalRefConfig {
allow_hosts: vec![hostname.clone()],
max_depth: 10,
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder()
.allow_insecure_http(true)
.allowed_private_hosts(vec![hostname.clone()])
.build();
let stats = resolve_external_refs(&mut value, None, &config, &client)
.await
.unwrap();
// A and B resolved, but the circular back-ref to A was detected
assert_eq!(stats.refs_resolved, 2);
assert_eq!(stats.refs_skipped, 1);
assert!(
value["schema"]["next"]["back"]
.get("$circular_external_ref")
.is_some()
);
}
#[tokio::test]
async fn test_resolve_max_bytes_exceeded() {
let mut server = mockito::Server::new_async().await;
let host = server.host_with_port();
let large_body = "x".repeat(500);
let _mock = server
.mock("GET", "/big.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(format!("{{\"data\": \"{large_body}\"}}"))
.create_async()
.await;
let ref_url = format!("http://{host}/big.json");
let mut value = json!({
"schema": { "$ref": ref_url }
});
let hostname = host.split(':').next().unwrap().to_string();
let config = ExternalRefConfig {
allow_hosts: vec![hostname.clone()],
max_depth: 5,
max_bytes: 100, // Very small limit
};
let client = AsyncHttpClient::builder()
.allow_insecure_http(true)
.allowed_private_hosts(vec![hostname.clone()])
.build();
let result = resolve_external_refs(&mut value, None, &config, &client).await;
assert!(result.is_err());
match result.unwrap_err() {
SwaggerCliError::PolicyBlocked(msg) => {
assert!(msg.contains("--ref-max-bytes"));
}
other => panic!("expected PolicyBlocked, got: {other:?}"),
}
}
#[tokio::test]
async fn test_resolve_relative_ref_integration() {
let mut server = mockito::Server::new_async().await;
let host = server.host_with_port();
let pet_schema = json!({
"type": "object",
"properties": {
"name": { "type": "string" }
}
});
let _mock = server
.mock("GET", "/api/schemas/Pet.json")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(serde_json::to_string(&pet_schema).unwrap())
.create_async()
.await;
let base_url = format!("http://{host}/api/spec.json");
let mut value = json!({
"schema": { "$ref": "./schemas/Pet.json" }
});
let hostname = host.split(':').next().unwrap().to_string();
let config = ExternalRefConfig {
allow_hosts: vec![hostname.clone()],
max_depth: 5,
max_bytes: 1_048_576,
};
let client = AsyncHttpClient::builder()
.allow_insecure_http(true)
.allowed_private_hosts(vec![hostname.clone()])
.build();
let stats = resolve_external_refs(&mut value, Some(&base_url), &config, &client)
.await
.unwrap();
assert_eq!(stats.refs_resolved, 1);
assert_eq!(value["schema"]["type"], "object");
}
}

View File

@@ -1,6 +1,7 @@
pub mod cache;
pub mod config;
pub mod diff;
pub mod external_refs;
pub mod http;
pub mod indexer;
pub mod network;

View File

@@ -63,10 +63,12 @@ fn expand_recursive(
let pointer = &ref_str[1..]; // strip leading '#'
if let Some(resolved) = resolve_json_pointer(root, pointer) {
let mut expanded = resolved.clone();
visited.insert(ref_str);
visited.insert(ref_str.clone());
expand_recursive(&mut expanded, root, max_depth, depth + 1, visited);
// Do not remove from visited: keep it for sibling detection within the same
// subtree path. The caller manages the visited set across siblings.
// Remove after expansion so sibling subtrees can also expand this ref.
// The ancestor path (tracked via depth) still prevents true circular refs
// because the ref is in `visited` during its own subtree's expansion.
visited.remove(&ref_str);
*value = expanded;
}
// If pointer doesn't resolve, leave the $ref as-is (broken ref)
@@ -292,4 +294,58 @@ mod tests {
// Broken internal ref left untouched
assert_eq!(value, original);
}
#[test]
fn test_expand_shared_refs_both_expand() {
// Two sibling subtrees reference the same schema. Both should expand
// correctly -- shared refs are NOT circular.
let root = json!({
"components": {
"schemas": {
"Pet": {
"type": "object",
"properties": {
"name": { "type": "string" }
}
}
}
}
});
let mut value = json!({
"requestBody": {
"schema": { "$ref": "#/components/schemas/Pet" }
},
"response": {
"schema": { "$ref": "#/components/schemas/Pet" }
}
});
expand_refs(&mut value, &root, 5);
// Both should be fully expanded (not marked as $circular_ref)
assert_eq!(value["requestBody"]["schema"]["type"], "object");
assert_eq!(
value["requestBody"]["schema"]["properties"]["name"]["type"],
"string"
);
assert_eq!(value["response"]["schema"]["type"], "object");
assert_eq!(
value["response"]["schema"]["properties"]["name"]["type"],
"string"
);
// Neither should have $circular_ref
assert!(
value["requestBody"]["schema"]
.get("$circular_ref")
.is_none(),
"requestBody ref should not be marked circular"
);
assert!(
value["response"]["schema"].get("$circular_ref").is_none(),
"response ref should not be marked circular"
);
}
}

View File

@@ -26,7 +26,7 @@ pub enum SearchResultType {
}
impl SearchResultType {
fn ordinal(self) -> u8 {
pub(crate) fn ordinal(self) -> u8 {
match self {
Self::Endpoint => 0,
Self::Schema => 1,