sppd_cli/
config.rs

1use crate::errors::{AppError, AppResult};
2use serde::Deserialize;
3use std::fs;
4use std::path::{Path, PathBuf};
5
6/// Resolved configuration with all values filled in (no Options).
7///
8/// This struct represents the pipeline defaults and can be deserialized by the TOML
9/// loader. All fields have concrete values, making it safe to access directly without unwrapping.
10#[derive(Debug, Clone, Deserialize)]
11#[serde(default, deny_unknown_fields)]
12pub struct ResolvedConfig {
13    // Paths
14    pub download_dir_mc: PathBuf,
15    pub download_dir_pt: PathBuf,
16    pub parquet_dir_mc: PathBuf,
17    pub parquet_dir_pt: PathBuf,
18
19    // Processing
20    /// Number of XML files processed per chunk during parsing.
21    /// This also bounds the peak in-memory DataFrame size.
22    pub batch_size: usize,
23    /// Number of concurrent XML file reads during parsing.
24    pub read_concurrency: usize,
25    /// Whether to concatenate per-batch parquet files into a single period file.
26    pub concat_batches: bool,
27    pub max_retries: u32,
28    pub retry_initial_delay_ms: u64,
29    pub retry_max_delay_ms: u64,
30
31    // Downloads
32    pub concurrent_downloads: usize,
33}
34
35impl Default for ResolvedConfig {
36    fn default() -> Self {
37        Self {
38            download_dir_mc: PathBuf::from("data/tmp/mc"),
39            download_dir_pt: PathBuf::from("data/tmp/pt"),
40            parquet_dir_mc: PathBuf::from("data/parquet/mc"),
41            parquet_dir_pt: PathBuf::from("data/parquet/pt"),
42            batch_size: 150,
43            read_concurrency: 16,
44            concat_batches: false,
45            max_retries: 3,
46            retry_initial_delay_ms: 1000,
47            retry_max_delay_ms: 10000,
48            concurrent_downloads: 4,
49        }
50    }
51}
52
53/// Configuration that can be loaded from a TOML file.
54#[derive(Debug, Clone, Deserialize)]
55#[serde(deny_unknown_fields)]
56pub struct ResolvedConfigFile {
57    #[serde(rename = "type")]
58    pub procurement_type: String,
59    pub start: String,
60    pub end: String,
61    #[serde(default = "default_cleanup")]
62    pub cleanup: bool,
63    #[serde(flatten)]
64    pub resolved: ResolvedConfig,
65}
66
67impl ResolvedConfigFile {
68    pub fn from_toml_file(path: &Path) -> AppResult<Self> {
69        let contents = fs::read_to_string(path)?;
70        let config: ResolvedConfigFile = toml::from_str(&contents)
71            .map_err(|e| AppError::InvalidInput(format!("Failed to parse config: {e}")))?;
72
73        if config.resolved.batch_size == 0 {
74            return Err(AppError::InvalidInput(
75                "Batch size must be greater than 0".into(),
76            ));
77        }
78        if config.resolved.read_concurrency == 0 {
79            return Err(AppError::InvalidInput(
80                "Read concurrency must be greater than 0".into(),
81            ));
82        }
83
84        Ok(config)
85    }
86}
87
88fn default_cleanup() -> bool {
89    true
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95    use std::io::Write;
96    use tempfile::NamedTempFile;
97
98    #[test]
99    fn default_config_values() {
100        let config = ResolvedConfig::default();
101        assert_eq!(config.batch_size, 150);
102        assert_eq!(config.read_concurrency, 16);
103        assert!(!config.concat_batches);
104        assert_eq!(config.concurrent_downloads, 4);
105    }
106
107    #[test]
108    fn minimal_toml_is_parsed_and_defaults_apply() {
109        let mut tmp = NamedTempFile::new().unwrap();
110        write!(
111            tmp,
112            r#"
113            type = "mc"
114            start = "202301"
115            end = "202312"
116            "#,
117        )
118        .unwrap();
119
120        let config = ResolvedConfigFile::from_toml_file(tmp.path()).unwrap();
121        assert_eq!(config.procurement_type, "mc");
122        assert_eq!(config.start, "202301");
123        assert_eq!(config.end, "202312");
124        assert!(config.cleanup);
125        assert_eq!(config.resolved.max_retries, 3);
126        assert_eq!(config.resolved.concurrent_downloads, 4);
127    }
128
129    #[test]
130    fn missing_required_toml_field_errors() {
131        let mut tmp = NamedTempFile::new().unwrap();
132        write!(
133            tmp,
134            r#"
135            type = "pt"
136            start = "202301"
137            "#,
138        )
139        .unwrap();
140
141        assert!(ResolvedConfigFile::from_toml_file(tmp.path()).is_err());
142    }
143
144    #[test]
145    fn unknown_key_errors() {
146        let mut tmp = NamedTempFile::new().unwrap();
147        write!(
148            tmp,
149            r#"
150            type = "pt"
151            start = "202301"
152            end = "202302"
153            extra_flag = true
154            "#,
155        )
156        .unwrap();
157
158        assert!(ResolvedConfigFile::from_toml_file(tmp.path()).is_err());
159    }
160}