initiative_core/utils/
quoted_word_iter.rs

1use super::Substr;
2use std::str::CharIndices;
3
4/// Iterate through all words in the input, treating multiple words surrounded by quotation marks
5/// as a single word. Returns [`Substr`] objects, preserving the word's context within the larger
6/// string.
7///
8/// # Examples
9///
10/// ```
11/// # use initiative_core::utils::quoted_words;
12/// let mut iter = quoted_words(r#"   Ronny  "Two Spoons" Johnson  "#)
13///     .map(|substr| substr.as_str());
14///
15/// assert_eq!(Some("Ronny"), iter.next());
16/// assert_eq!(Some("Two Spoons"), iter.next());
17/// assert_eq!(Some("Johnson"), iter.next());
18/// assert_eq!(None, iter.next());
19/// ```
20///
21/// ## Interacting with the [`Substr`] object
22///
23/// ```
24/// # use initiative_core::utils::quoted_words;
25/// let mut iter = quoted_words(r#"   Ronny  "Two Spoons" Johnson  "#);
26/// let word = iter.nth(1).unwrap();
27///
28/// assert_eq!("Two Spoons", word.as_str());
29/// assert_eq!(r#""Two Spoons""#, word.as_outer_str());
30/// assert_eq!(" Johnson  ", word.after().as_str());
31/// assert_eq!(r#"   Ronny  "Two Spoons" Johnson  "#, word.as_original_str());
32/// ```
33pub fn quoted_words<'a, W>(phrase: W) -> impl Iterator<Item = Substr<'a>>
34where
35    W: Into<Substr<'a>>,
36{
37    QuotedWordIter::new(phrase.into())
38}
39
40/// Iterate through the possible phrases in the input (always starting from the first word). In the
41/// event that the first word is quoted per [`quoted_words`], the first result will be the contents
42/// of the quotes, but subsequent results will include the quotes as part of a larger phrase.
43///
44/// Also like `quoted_words`, the returned values are [`Substr`] objects, which can be cast back to
45/// `&str` using [`Substr::as_str`].
46///
47/// # Examples
48///
49/// ```
50/// # use initiative_core::utils::quoted_phrases;
51/// let mut iter = quoted_phrases(r#"  "Medium" Dave Lilywhite  "#)
52///     .map(|substr| substr.as_str());
53///
54/// assert_eq!(Some("Medium"), iter.next());
55/// assert_eq!(Some(r#""Medium" Dave"#), iter.next());
56/// assert_eq!(Some(r#""Medium" Dave Lilywhite"#), iter.next());
57/// assert_eq!(None, iter.next());
58/// ```
59pub fn quoted_phrases<'a, W>(phrase: W) -> impl Iterator<Item = Substr<'a>>
60where
61    W: Into<Substr<'a>>,
62{
63    let mut iter = quoted_words(phrase);
64    let first = iter.next();
65    let start = first.as_ref().map(|f| f.range().start).unwrap_or(0);
66
67    first.into_iter().chain(iter.map(move |substr| {
68        Substr::new(
69            substr.as_original_str(),
70            start..substr.range().end,
71            start..substr.range().end,
72        )
73    }))
74}
75
76pub struct QuotedWordIter<'a> {
77    phrase: Substr<'a>,
78    char_iter: CharIndices<'a>,
79    quote_len: Option<usize>,
80}
81
82impl<'a> QuotedWordIter<'a> {
83    fn new(phrase: Substr<'a>) -> Self {
84        Self {
85            char_iter: phrase.as_original_str().char_indices(),
86            phrase,
87            quote_len: None,
88        }
89    }
90}
91
92impl<'a> Iterator for QuotedWordIter<'a> {
93    type Item = Substr<'a>;
94
95    fn next(&mut self) -> Option<Self::Item> {
96        let (quote_char, first_index) = if let Some(quote_len) = self.quote_len {
97            self.quote_len = None;
98
99            if let Some((i, c)) = self.char_iter.next() {
100                if c == '"' {
101                    return Some(Substr::new(
102                        self.phrase.as_original_str(),
103                        i..i,
104                        i - quote_len..i + c.len_utf8(),
105                    ));
106                } else {
107                    (
108                        self.phrase.as_original_str()[i - quote_len..i]
109                            .chars()
110                            .next(),
111                        i,
112                    )
113                }
114            } else {
115                let original_str = self.phrase.as_original_str();
116                return Some(Substr::new(
117                    original_str,
118                    original_str.len()..original_str.len(),
119                    original_str.len() - quote_len..original_str.len(),
120                ));
121            }
122        } else {
123            let (first_index, first_char) = loop {
124                if let Some((i, c)) = self.char_iter.next() {
125                    if !c.is_whitespace() {
126                        break (i, c);
127                    }
128                } else {
129                    return None;
130                }
131            };
132
133            if first_char == '"' {
134                if let Some((i, c)) = self.char_iter.next() {
135                    if c == '"' {
136                        // Empty quotes = yield empty string
137                        return Some(Substr::new(
138                            self.phrase.as_original_str(),
139                            i..i,
140                            i - first_char.len_utf8()..i + c.len_utf8(),
141                        ));
142                    } else {
143                        (Some(first_char), i)
144                    }
145                } else {
146                    let original_str = self.phrase.as_original_str();
147                    return Some(Substr::new(
148                        original_str,
149                        original_str.len()..original_str.len(),
150                        first_index..original_str.len(),
151                    ));
152                }
153            } else {
154                (None, first_index)
155            }
156        };
157
158        let last_index = loop {
159            if let Some((i, c)) = self.char_iter.next() {
160                if let Some(quote_char) = quote_char {
161                    if c == '"' {
162                        return Some(Substr::new(
163                            self.phrase.as_original_str(),
164                            first_index..i,
165                            first_index - quote_char.len_utf8()..i + c.len_utf8(),
166                        ));
167                    }
168                } else if c == '"' {
169                    self.quote_len = Some(c.len_utf8());
170                    break i;
171                } else if c.is_whitespace() {
172                    break i;
173                }
174            } else if let Some(quote_char) = quote_char {
175                let original_str = self.phrase.as_original_str();
176                return Some(Substr::new(
177                    original_str,
178                    first_index..original_str.len(),
179                    first_index - quote_char.len_utf8()..original_str.len(),
180                ));
181            } else {
182                break self.phrase.as_original_str().len();
183            }
184        };
185
186        Some(Substr::new(
187            self.phrase.as_original_str(),
188            first_index..last_index,
189            first_index..last_index,
190        ))
191    }
192}
193
194#[cfg(test)]
195mod test {
196    use super::*;
197
198    #[test]
199    fn quoted_word_iter_test() {
200        let input = "a boy \n named \"Johnny Cash\"";
201        let mut input_iter = quoted_words(input);
202
203        let word = input_iter.next().unwrap();
204        assert_eq!("a", word.as_str());
205        assert_eq!(0..1, word.range());
206
207        let word = input_iter.next().unwrap();
208        assert_eq!("boy", word.as_str());
209        assert_eq!(2..5, word.range());
210
211        let word = input_iter.next().unwrap();
212        assert_eq!("named", word.as_str());
213        assert_eq!(8..13, word.range());
214
215        let word = input_iter.next().unwrap();
216        assert_eq!("Johnny Cash", word.as_str());
217        assert_eq!(14..27, word.range());
218
219        assert!(input_iter.next().is_none());
220    }
221
222    #[test]
223    fn quoted_phrases_test() {
224        let input = "  \"Medium\" Dave  ";
225        let mut input_iter = quoted_phrases(input);
226
227        let substr = input_iter.next().unwrap();
228        assert_eq!("Medium", substr.as_str());
229        assert_eq!(2..10, substr.range());
230
231        let substr = input_iter.next().unwrap();
232        assert_eq!("\"Medium\" Dave", substr.as_str());
233        assert_eq!(2..15, substr.range());
234
235        assert!(input_iter.next().is_none());
236    }
237
238    #[test]
239    fn quoted_phrases_test_repeated() {
240        assert_eq!(
241            vec!["badger", "badger badger", "badger badger badger"],
242            quoted_phrases("badger badger badger")
243                .map(|w| w.as_str())
244                .collect::<Vec<_>>(),
245        );
246    }
247
248    #[test]
249    fn quoted_word_iter_test_trailing_comma() {
250        let input = "\"Legolas\", an elf";
251        let mut input_iter = quoted_words(input);
252
253        let word = input_iter.next().unwrap();
254        assert_eq!("Legolas", word.as_str());
255        assert_eq!(0..9, word.range());
256
257        let word = input_iter.next().unwrap();
258        assert_eq!(",", word.as_str());
259        assert_eq!(9..10, word.range());
260
261        let word = input_iter.next().unwrap();
262        assert_eq!("an", word.as_str());
263        assert_eq!(11..13, word.range());
264
265        let word = input_iter.next().unwrap();
266        assert_eq!("elf", word.as_str());
267        assert_eq!(14..17, word.range());
268
269        assert!(input_iter.next().is_none());
270    }
271
272    #[test]
273    fn quoted_word_iter_test_empty_quotes() {
274        let input = "\"\"";
275        let mut input_iter = quoted_words(input);
276
277        let word = input_iter.next().unwrap();
278        assert_eq!("", word.as_str());
279        assert_eq!(0..2, word.range());
280
281        assert!(input_iter.next().is_none());
282    }
283
284    #[test]
285    fn quoted_word_iter_test_empty_quotes_mid_word() {
286        let input = "  bl\"\"ah ";
287        let mut input_iter = quoted_words(input);
288
289        let word = input_iter.next().unwrap();
290        assert_eq!("bl", word.as_str());
291        assert_eq!(2..4, word.range());
292
293        let word = input_iter.next().unwrap();
294        assert_eq!("", word.as_str());
295        assert_eq!(4..6, word.range());
296
297        let word = input_iter.next().unwrap();
298        assert_eq!("ah", word.as_str());
299        assert_eq!(6..8, word.range());
300
301        assert!(input_iter.next().is_none());
302    }
303
304    #[test]
305    fn quoted_word_iter_test_unclosed_quote() {
306        let input = "  bl\"ah ";
307        let mut input_iter = quoted_words(input);
308
309        let word = input_iter.next().unwrap();
310        assert_eq!("bl", word.as_str());
311        assert_eq!(2..4, word.range());
312
313        let word = input_iter.next().unwrap();
314        assert_eq!("ah ", word.as_str());
315        assert_eq!(4..8, word.range());
316
317        assert!(input_iter.next().is_none());
318    }
319
320    #[test]
321    fn quoted_word_iter_test_unclosed_quote_at_end() {
322        let input = " \"";
323        let mut input_iter = quoted_words(input);
324
325        let word = input_iter.next().unwrap();
326        assert_eq!("", word.as_str());
327        assert_eq!(1..2, word.range());
328
329        assert!(input_iter.next().is_none());
330    }
331
332    #[test]
333    fn quoted_word_iter_test_trailing_quote() {
334        let input = "  bl\"";
335        let mut input_iter = quoted_words(input);
336
337        let word = input_iter.next().unwrap();
338        assert_eq!("bl", word.as_str());
339        assert_eq!(2..4, word.range());
340
341        let word = input_iter.next().unwrap();
342        assert_eq!("", word.as_str());
343        assert_eq!(4..5, word.range());
344
345        assert!(input_iter.next().is_none());
346    }
347
348    #[test]
349    fn quoted_word_iter_test_single_letter() {
350        let input = "🥔";
351        let mut input_iter = quoted_words(input);
352
353        let word = input_iter.next().unwrap();
354        assert_eq!("🥔", word.as_str());
355        assert_eq!(0..4, word.range());
356
357        assert!(input_iter.next().is_none());
358    }
359
360    #[test]
361    fn quoted_word_iter_test_empty() {
362        assert!(quoted_words("").next().is_none());
363        assert!(quoted_words(" ").next().is_none());
364    }
365}