sppd_cli/parser/
file_finder.rs

1use crate::errors::{AppError, AppResult};
2use std::path::PathBuf;
3
4/// Finds all XML/Atom files organized by subdirectory.
5///
6/// This function scans the immediate subdirectories of the given path and
7/// recursively collects all `.xml` and `.atom` files within each subdirectory.
8/// Files in the top-level directory are ignored.
9///
10/// # Returns
11///
12/// Returns a vector of tuples where:
13/// - First element: Subdirectory name (e.g., "202301")
14/// - Second element: Vector of paths to XML/Atom files found in that subdirectory
15///
16/// Only subdirectories containing at least one XML/Atom file are included.
17///
18/// # Arguments
19///
20/// * `path` - Base directory to search (typically the extraction directory)
21///
22/// # Errors
23///
24/// Returns an error if directory reading fails.
25pub fn find_xmls(path: &std::path::Path) -> AppResult<Vec<(String, Vec<PathBuf>)>> {
26    // Pre-allocate with conservative estimate (usually 1-100 subdirectories)
27    let mut out = Vec::with_capacity(50);
28
29    for subdir in std::fs::read_dir(path).map_err(AppError::from)? {
30        let subdir = subdir.map_err(AppError::from)?;
31        let file_type = subdir.file_type().map_err(AppError::from)?;
32        if file_type.is_dir() {
33            let subdir_path = subdir.path();
34            let files = collect_xmls(&subdir_path);
35            if !files.is_empty() {
36                let name = subdir_path
37                    .file_name()
38                    .and_then(|n| n.to_str())
39                    .unwrap_or("")
40                    .to_string();
41                out.push((name, files));
42            }
43        }
44    }
45
46    Ok(out)
47}
48
49/// Recursively collects `.xml` or `.atom` files in a directory (including subdirs).
50pub(crate) fn collect_xmls(dir: &std::path::Path) -> Vec<PathBuf> {
51    // Pre-allocate with conservative estimate (usually 1-20 XML files per directory)
52    let mut v = Vec::with_capacity(20);
53    let walker = walkdir::WalkDir::new(dir).into_iter();
54    for entry in walker.flatten() {
55        if entry.file_type().is_file() {
56            if let Some(ext) = entry.path().extension().and_then(|e| e.to_str()) {
57                if ext.eq_ignore_ascii_case("xml") || ext.eq_ignore_ascii_case("atom") {
58                    v.push(entry.path().to_path_buf());
59                }
60            }
61        }
62    }
63    v
64}
65
66#[cfg(test)]
67mod tests {
68    use super::*;
69    use std::fs;
70    use std::io::Write;
71    use tempfile::TempDir;
72
73    // Helper function to create a test XML file
74    fn create_test_xml_file(path: &std::path::Path, content: &str) {
75        let parent = path.parent().unwrap();
76        fs::create_dir_all(parent).unwrap();
77        fs::File::create(path)
78            .unwrap()
79            .write_all(content.as_bytes())
80            .unwrap();
81    }
82
83    #[test]
84    fn test_collect_xmls_recursive() {
85        let temp_dir = TempDir::new().unwrap();
86        let base_dir = temp_dir.path().join("base");
87        fs::create_dir_all(&base_dir).unwrap();
88
89        // Create nested structure
90        let subdir = base_dir.join("subdir");
91        fs::create_dir_all(&subdir).unwrap();
92        fs::create_dir_all(subdir.join("nested")).unwrap();
93
94        // Create XML and ATOM files at different levels
95        create_test_xml_file(&base_dir.join("file1.xml"), "<feed></feed>");
96        create_test_xml_file(&subdir.join("file2.xml"), "<feed></feed>");
97        create_test_xml_file(&subdir.join("nested/file3.atom"), "<feed></feed>");
98        create_test_xml_file(&base_dir.join("file.txt"), "not xml");
99        create_test_xml_file(&base_dir.join("file.XML"), "<feed></feed>");
100        create_test_xml_file(&base_dir.join("file.ATOM"), "<feed></feed>");
101
102        let files = collect_xmls(&base_dir);
103        assert_eq!(files.len(), 5); // file1.xml, file2.xml, file3.atom, file.XML, file.ATOM
104        assert!(files.iter().any(|p| p.ends_with("file1.xml")));
105        assert!(files.iter().any(|p| p.ends_with("file2.xml")));
106        assert!(files.iter().any(|p| p.ends_with("file3.atom")));
107        assert!(files.iter().any(|p| p.ends_with("file.XML")));
108        assert!(files.iter().any(|p| p.ends_with("file.ATOM")));
109        assert!(!files.iter().any(|p| p.ends_with("file.txt")));
110    }
111
112    #[test]
113    fn test_collect_xmls_case_insensitive() {
114        let temp_dir = TempDir::new().unwrap();
115        let base_dir = temp_dir.path().join("base");
116        fs::create_dir_all(&base_dir).unwrap();
117
118        create_test_xml_file(&base_dir.join("lower.xml"), "<feed></feed>");
119        create_test_xml_file(&base_dir.join("UPPER.XML"), "<feed></feed>");
120        create_test_xml_file(&base_dir.join("Mixed.Xml"), "<feed></feed>");
121        create_test_xml_file(&base_dir.join("lower.atom"), "<feed></feed>");
122        create_test_xml_file(&base_dir.join("UPPER.ATOM"), "<feed></feed>");
123        create_test_xml_file(&base_dir.join("Mixed.Atom"), "<feed></feed>");
124
125        let files = collect_xmls(&base_dir);
126        assert_eq!(files.len(), 6);
127    }
128
129    #[test]
130    fn test_find_xmls_with_subdirectories() {
131        let temp_dir = TempDir::new().unwrap();
132        let base_dir = temp_dir.path().join("extract");
133        fs::create_dir_all(&base_dir).unwrap();
134
135        // Create subdirectories
136        let subdir1 = base_dir.join("202301");
137        let subdir2 = base_dir.join("202302");
138        fs::create_dir_all(&subdir1).unwrap();
139        fs::create_dir_all(&subdir2).unwrap();
140
141        // Add XML files to subdirectories
142        create_test_xml_file(&subdir1.join("file1.xml"), "<feed></feed>");
143        create_test_xml_file(&subdir1.join("file2.xml"), "<feed></feed>");
144        create_test_xml_file(&subdir2.join("file1.atom"), "<feed></feed>");
145
146        // Add non-XML file (should be ignored)
147        create_test_xml_file(&subdir2.join("file.txt"), "text");
148
149        // Add file at top level (should be ignored)
150        create_test_xml_file(&base_dir.join("top.xml"), "<feed></feed>");
151
152        let result = find_xmls(&base_dir).unwrap();
153        assert_eq!(result.len(), 2);
154
155        let (name1, files1) = result.iter().find(|(n, _)| n == "202301").unwrap();
156        assert_eq!(name1, "202301");
157        assert_eq!(files1.len(), 2);
158
159        let (name2, files2) = result.iter().find(|(n, _)| n == "202302").unwrap();
160        assert_eq!(name2, "202302");
161        assert_eq!(files2.len(), 1);
162    }
163
164    #[test]
165    fn test_find_xmls_empty_directories() {
166        let temp_dir = TempDir::new().unwrap();
167        let base_dir = temp_dir.path().join("extract");
168        fs::create_dir_all(&base_dir).unwrap();
169
170        // Create empty subdirectory
171        fs::create_dir_all(base_dir.join("empty")).unwrap();
172
173        // Create subdirectory with only non-XML files
174        let no_xml_dir = base_dir.join("no_xml");
175        fs::create_dir_all(&no_xml_dir).unwrap();
176        create_test_xml_file(&no_xml_dir.join("file.txt"), "text");
177
178        let result = find_xmls(&base_dir).unwrap();
179        assert_eq!(result.len(), 0);
180    }
181
182    #[test]
183    fn test_find_xmls_nested_structure() {
184        let temp_dir = TempDir::new().unwrap();
185        let base_dir = temp_dir.path().join("extract");
186        fs::create_dir_all(&base_dir).unwrap();
187
188        let subdir = base_dir.join("202301");
189        fs::create_dir_all(&subdir).unwrap();
190        fs::create_dir_all(subdir.join("level1/level2")).unwrap();
191
192        create_test_xml_file(&subdir.join("file1.xml"), "<feed></feed>");
193        create_test_xml_file(&subdir.join("level1/file2.xml"), "<feed></feed>");
194        create_test_xml_file(&subdir.join("level1/level2/file3.atom"), "<feed></feed>");
195
196        let result = find_xmls(&base_dir).unwrap();
197        assert_eq!(result.len(), 1);
198        let (_, files) = &result[0];
199        assert_eq!(files.len(), 3);
200    }
201}