1use super::Substr;
2use std::str::CharIndices;
3
4pub fn quoted_words<'a, W>(phrase: W) -> impl Iterator<Item = Substr<'a>>
34where
35 W: Into<Substr<'a>>,
36{
37 QuotedWordIter::new(phrase.into())
38}
39
40pub fn quoted_phrases<'a, W>(phrase: W) -> impl Iterator<Item = Substr<'a>>
60where
61 W: Into<Substr<'a>>,
62{
63 let mut iter = quoted_words(phrase);
64 let first = iter.next();
65 let start = first.as_ref().map(|f| f.range().start).unwrap_or(0);
66
67 first.into_iter().chain(iter.map(move |substr| {
68 Substr::new(
69 substr.as_original_str(),
70 start..substr.range().end,
71 start..substr.range().end,
72 )
73 }))
74}
75
76pub struct QuotedWordIter<'a> {
77 phrase: Substr<'a>,
78 char_iter: CharIndices<'a>,
79 quote_len: Option<usize>,
80}
81
82impl<'a> QuotedWordIter<'a> {
83 fn new(phrase: Substr<'a>) -> Self {
84 Self {
85 char_iter: phrase.as_original_str().char_indices(),
86 phrase,
87 quote_len: None,
88 }
89 }
90}
91
92impl<'a> Iterator for QuotedWordIter<'a> {
93 type Item = Substr<'a>;
94
95 fn next(&mut self) -> Option<Self::Item> {
96 let (quote_char, first_index) = if let Some(quote_len) = self.quote_len {
97 self.quote_len = None;
98
99 if let Some((i, c)) = self.char_iter.next() {
100 if c == '"' {
101 return Some(Substr::new(
102 self.phrase.as_original_str(),
103 i..i,
104 i - quote_len..i + c.len_utf8(),
105 ));
106 } else {
107 (
108 self.phrase.as_original_str()[i - quote_len..i]
109 .chars()
110 .next(),
111 i,
112 )
113 }
114 } else {
115 let original_str = self.phrase.as_original_str();
116 return Some(Substr::new(
117 original_str,
118 original_str.len()..original_str.len(),
119 original_str.len() - quote_len..original_str.len(),
120 ));
121 }
122 } else {
123 let (first_index, first_char) = loop {
124 if let Some((i, c)) = self.char_iter.next() {
125 if !c.is_whitespace() {
126 break (i, c);
127 }
128 } else {
129 return None;
130 }
131 };
132
133 if first_char == '"' {
134 if let Some((i, c)) = self.char_iter.next() {
135 if c == '"' {
136 return Some(Substr::new(
138 self.phrase.as_original_str(),
139 i..i,
140 i - first_char.len_utf8()..i + c.len_utf8(),
141 ));
142 } else {
143 (Some(first_char), i)
144 }
145 } else {
146 let original_str = self.phrase.as_original_str();
147 return Some(Substr::new(
148 original_str,
149 original_str.len()..original_str.len(),
150 first_index..original_str.len(),
151 ));
152 }
153 } else {
154 (None, first_index)
155 }
156 };
157
158 let last_index = loop {
159 if let Some((i, c)) = self.char_iter.next() {
160 if let Some(quote_char) = quote_char {
161 if c == '"' {
162 return Some(Substr::new(
163 self.phrase.as_original_str(),
164 first_index..i,
165 first_index - quote_char.len_utf8()..i + c.len_utf8(),
166 ));
167 }
168 } else if c == '"' {
169 self.quote_len = Some(c.len_utf8());
170 break i;
171 } else if c.is_whitespace() {
172 break i;
173 }
174 } else if let Some(quote_char) = quote_char {
175 let original_str = self.phrase.as_original_str();
176 return Some(Substr::new(
177 original_str,
178 first_index..original_str.len(),
179 first_index - quote_char.len_utf8()..original_str.len(),
180 ));
181 } else {
182 break self.phrase.as_original_str().len();
183 }
184 };
185
186 Some(Substr::new(
187 self.phrase.as_original_str(),
188 first_index..last_index,
189 first_index..last_index,
190 ))
191 }
192}
193
194#[cfg(test)]
195mod test {
196 use super::*;
197
198 #[test]
199 fn quoted_word_iter_test() {
200 let input = "a boy \n named \"Johnny Cash\"";
201 let mut input_iter = quoted_words(input);
202
203 let word = input_iter.next().unwrap();
204 assert_eq!("a", word.as_str());
205 assert_eq!(0..1, word.range());
206
207 let word = input_iter.next().unwrap();
208 assert_eq!("boy", word.as_str());
209 assert_eq!(2..5, word.range());
210
211 let word = input_iter.next().unwrap();
212 assert_eq!("named", word.as_str());
213 assert_eq!(8..13, word.range());
214
215 let word = input_iter.next().unwrap();
216 assert_eq!("Johnny Cash", word.as_str());
217 assert_eq!(14..27, word.range());
218
219 assert!(input_iter.next().is_none());
220 }
221
222 #[test]
223 fn quoted_phrases_test() {
224 let input = " \"Medium\" Dave ";
225 let mut input_iter = quoted_phrases(input);
226
227 let substr = input_iter.next().unwrap();
228 assert_eq!("Medium", substr.as_str());
229 assert_eq!(2..10, substr.range());
230
231 let substr = input_iter.next().unwrap();
232 assert_eq!("\"Medium\" Dave", substr.as_str());
233 assert_eq!(2..15, substr.range());
234
235 assert!(input_iter.next().is_none());
236 }
237
238 #[test]
239 fn quoted_phrases_test_repeated() {
240 assert_eq!(
241 vec!["badger", "badger badger", "badger badger badger"],
242 quoted_phrases("badger badger badger")
243 .map(|w| w.as_str())
244 .collect::<Vec<_>>(),
245 );
246 }
247
248 #[test]
249 fn quoted_word_iter_test_trailing_comma() {
250 let input = "\"Legolas\", an elf";
251 let mut input_iter = quoted_words(input);
252
253 let word = input_iter.next().unwrap();
254 assert_eq!("Legolas", word.as_str());
255 assert_eq!(0..9, word.range());
256
257 let word = input_iter.next().unwrap();
258 assert_eq!(",", word.as_str());
259 assert_eq!(9..10, word.range());
260
261 let word = input_iter.next().unwrap();
262 assert_eq!("an", word.as_str());
263 assert_eq!(11..13, word.range());
264
265 let word = input_iter.next().unwrap();
266 assert_eq!("elf", word.as_str());
267 assert_eq!(14..17, word.range());
268
269 assert!(input_iter.next().is_none());
270 }
271
272 #[test]
273 fn quoted_word_iter_test_empty_quotes() {
274 let input = "\"\"";
275 let mut input_iter = quoted_words(input);
276
277 let word = input_iter.next().unwrap();
278 assert_eq!("", word.as_str());
279 assert_eq!(0..2, word.range());
280
281 assert!(input_iter.next().is_none());
282 }
283
284 #[test]
285 fn quoted_word_iter_test_empty_quotes_mid_word() {
286 let input = " bl\"\"ah ";
287 let mut input_iter = quoted_words(input);
288
289 let word = input_iter.next().unwrap();
290 assert_eq!("bl", word.as_str());
291 assert_eq!(2..4, word.range());
292
293 let word = input_iter.next().unwrap();
294 assert_eq!("", word.as_str());
295 assert_eq!(4..6, word.range());
296
297 let word = input_iter.next().unwrap();
298 assert_eq!("ah", word.as_str());
299 assert_eq!(6..8, word.range());
300
301 assert!(input_iter.next().is_none());
302 }
303
304 #[test]
305 fn quoted_word_iter_test_unclosed_quote() {
306 let input = " bl\"ah ";
307 let mut input_iter = quoted_words(input);
308
309 let word = input_iter.next().unwrap();
310 assert_eq!("bl", word.as_str());
311 assert_eq!(2..4, word.range());
312
313 let word = input_iter.next().unwrap();
314 assert_eq!("ah ", word.as_str());
315 assert_eq!(4..8, word.range());
316
317 assert!(input_iter.next().is_none());
318 }
319
320 #[test]
321 fn quoted_word_iter_test_unclosed_quote_at_end() {
322 let input = " \"";
323 let mut input_iter = quoted_words(input);
324
325 let word = input_iter.next().unwrap();
326 assert_eq!("", word.as_str());
327 assert_eq!(1..2, word.range());
328
329 assert!(input_iter.next().is_none());
330 }
331
332 #[test]
333 fn quoted_word_iter_test_trailing_quote() {
334 let input = " bl\"";
335 let mut input_iter = quoted_words(input);
336
337 let word = input_iter.next().unwrap();
338 assert_eq!("bl", word.as_str());
339 assert_eq!(2..4, word.range());
340
341 let word = input_iter.next().unwrap();
342 assert_eq!("", word.as_str());
343 assert_eq!(4..5, word.range());
344
345 assert!(input_iter.next().is_none());
346 }
347
348 #[test]
349 fn quoted_word_iter_test_single_letter() {
350 let input = "🥔";
351 let mut input_iter = quoted_words(input);
352
353 let word = input_iter.next().unwrap();
354 assert_eq!("🥔", word.as_str());
355 assert_eq!(0..4, word.range());
356
357 assert!(input_iter.next().is_none());
358 }
359
360 #[test]
361 fn quoted_word_iter_test_empty() {
362 assert!(quoted_words("").next().is_none());
363 assert!(quoted_words(" ").next().is_none());
364 }
365}