sppd_cli/parser/
cleanup.rs

1use crate::errors::AppResult;
2use std::collections::BTreeMap;
3use tracing::{info, warn};
4
5/// Deletes ZIP files and extracted directories after processing.
6///
7/// This function removes temporary files created during the download and extraction
8/// phases, keeping only the final Parquet files. It's typically called after
9/// successful parsing to free up disk space.
10///
11/// # Behavior
12///
13/// For each period in `target_links`, this function:
14/// - Deletes the ZIP file: `{extract_dir}/{period}.zip`
15/// - Deletes the extracted directory: `{extract_dir}/{period}/` (recursively removes all XML/Atom files)
16///
17/// # Arguments
18///
19/// * `target_links` - Map of period strings to URLs (determines which files to delete)
20/// * `procurement_type` - Procurement type determining the extraction directory
21/// * `should_cleanup` - If `false`, the function returns immediately without deleting anything
22/// * `config` - Resolved configuration containing directory paths
23///
24/// # Error Handling
25///
26/// Individual deletion errors are logged as warnings but do not fail the entire operation.
27/// The function continues processing remaining files even if some deletions fail.
28pub async fn cleanup_files(
29    target_links: &BTreeMap<String, String>,
30    procurement_type: &crate::models::ProcurementType,
31    should_cleanup: bool,
32    config: &crate::config::ResolvedConfig,
33) -> AppResult<()> {
34    if !should_cleanup {
35        info!("Cleanup skipped (--cleanup=no)");
36        return Ok(());
37    }
38
39    let extract_dir = procurement_type.extract_dir(config);
40    if !extract_dir.exists() {
41        info!("Extract directory does not exist, skipping cleanup");
42        return Ok(());
43    }
44
45    info!("Starting cleanup phase");
46
47    let mut zip_deleted = 0;
48    let mut zip_errors = 0;
49    let mut dir_deleted = 0;
50    let mut dir_errors = 0;
51
52    for period in target_links.keys() {
53        // Delete ZIP file
54        let zip_path = extract_dir.join(format!("{period}.zip"));
55        if zip_path.exists() {
56            match tokio::fs::remove_file(&zip_path).await {
57                Ok(_) => {
58                    zip_deleted += 1;
59                }
60                Err(e) => {
61                    zip_errors += 1;
62                    warn!(
63                        zip_file = %zip_path.display(),
64                        period = period,
65                        error = %e,
66                        "Failed to delete ZIP file"
67                    );
68                }
69            }
70        }
71
72        // Delete extracted directory (contains XML/Atom files)
73        let extract_dir_path = extract_dir.join(period);
74        if extract_dir_path.exists() {
75            match tokio::fs::remove_dir_all(&extract_dir_path).await {
76                Ok(_) => {
77                    dir_deleted += 1;
78                }
79                Err(e) => {
80                    dir_errors += 1;
81                    warn!(
82                        extract_dir = %extract_dir_path.display(),
83                        period = period,
84                        error = %e,
85                        "Failed to delete extracted directory"
86                    );
87                }
88            }
89        }
90    }
91
92    info!(
93        zip_deleted = zip_deleted,
94        zip_errors = zip_errors,
95        dir_deleted = dir_deleted,
96        dir_errors = dir_errors,
97        "Cleanup completed"
98    );
99
100    Ok(())
101}