1 /* Extras module
2 
3    Provides some nice-to-have, often useful things.
4 
5    It publicly imports 'core', so there's no need to import it if you use
6    extras.
7 
8    */
9 
10 module parsed.extras;
11 
12 import std.range;
13 import std.traits;
14 
15 public import parsed.core;
16 
17 /* ---------- single-character parsers ---------- */
18 
19 /* A char of whitespace. May optionally also match newlines.
20 
21   Note: on Windows machines EOL is two chars, so there'll be leftover chars
22   after this parses away the first char in the pair. 
23  */
24 auto
25 whitespace(B, C = char)(bool acceptNewline = false)
26     if (isSomeChar!C)
27 {
28     bool acceptable(C ch)
29     {
30         import std.uni;
31         if (!ch.isWhite) return false;
32         if (!acceptNewline && (ch == '\n' || ch == '\r')) return false;
33         return true;
34     }
35     return singleChar!(B, C)(&acceptable);
36 }
37 unittest
38 {
39     string str1 = "foo bar";
40     string str2 = "foo\nbar";
41     string str3 = "foobar";
42 
43     auto p1 = literal!string("foo")
44         / whitespace!string;
45     assert(p1.match(str1));
46     assert(!p1.match(str2));
47     assert(!p1.match(str3));
48 
49     auto p2 = literal!string("foo")
50         / whitespace!string(true);
51     assert(p2.match(str1));
52     assert(p2.match(str2));
53     assert(!p2.match(str3));
54 }
55 
56 /* A char of anything but whitespace. */
57 auto
58 nonwhite(B, C = char)()
59     if (isSomeChar!C)
60 {
61     import std.uni;
62     return singleChar!(B, C)(c => !c.isWhite);
63 }
64 unittest
65 {
66     string str = "foo bar";
67     auto state = ParserState!string(str);
68     auto word = many(1, -1, nonwhite!string);
69 
70     auto res1 = word.run(state);
71     assert(res1.success);
72     assert(res1.parsed == "foo");
73 
74     auto res2 = word.run(res1);
75     assert(!res2.success);
76 }
77 
78 /* An alphanumeric char. */
79 auto
80 alnum(B, C = char)()
81     if (isSomeChar!C)
82 {
83     import std.uni;
84     return singleChar!(B, C)(c => c.isAlphaNum);
85 }
86 unittest
87 {
88     string str = "foo12";
89     auto state = ParserState!string(str);
90 
91     auto p = many(1, -1, alnum!string);
92     auto res = p.run(state);
93     assert(res.success);
94     assert(res.parsed == "foo12");
95 }
96 
97 /* An alphabetic char. */
98 auto
99 alpha(B, C = char)()
100     if (isSomeChar!C)
101 {
102     import std.uni;
103     return singleChar!(B, C)(c => c.isAlpha);
104 }
105 unittest
106 {
107     string str = "foo12";
108     auto state = ParserState!string(str);
109 
110     auto p = many(1, -1, alpha!string);
111     auto res = p.run(state);
112     assert(res.success);
113     assert(res.parsed == "foo");
114 }
115 
116 /* A decimal digit. */
117 auto
118 digit(B, C = char)()
119     if (isSomeChar!C)
120 {
121     import std.uni;
122     return singleChar!(B, C)(c => c.isNumber);
123 }
124 unittest
125 {
126     string str = "123f";
127     auto state = ParserState!string(str);
128 
129     auto p = many(1, -1, digit!string);
130     auto res = p.run(state);
131     assert(res.success);
132     assert(res.parsed == "123");
133 }
134 
135 /* A hexadecimal digit. */
136 auto
137 hexdigit(B, C = char)()
138     if (isSomeChar!C)
139 {
140     import std.uni;
141     return singleChar!(B, C)(c => c.isNumber 
142             || ('a' <= c && c <= 'f')
143             || ('A' <= c && c <= 'F'));
144 
145 }
146 
147 /* A newline. */
148 auto
149 newline(B, C = char)()
150     if (isSomeChar!C)
151 {
152     return singleChar!(B, C)(ch => ch == '\n' || ch == '\r');
153 }
154 unittest
155 {
156     string str = "foo\nbar";
157 
158     auto p = literal!string("foo") 
159         * newline!string 
160         * literal!string("bar");
161     assert(p.match(str));
162 }
163 
164 /* ---------- multi-character combinations of the above ---------- */
165 
166 /* Parses a whole line (with or without terminating newline). Note that
167    'keepTerminator' option only affects '.parsed', the terminating newline is
168    always removed from input. Always succeeds. */
169 Parser!(B, immutable(C)[]) /* auto doesn't work here. */
170 line(B, C = char)(bool keepTerminator)
171     if (isSomeChar!C)
172 {
173     import std.string;
174     alias S = immutable(C)[];
175 
176     auto res = charUntil!(B, C)(ch => ch == '\n' || ch == '\r', true);
177     if (keepTerminator)
178         return res;
179     else
180         return res / morph!(B, S)(s => s.chomp);
181 }
182 unittest
183 {
184     string str = "foo\nbar";
185     auto state = ParserState!string(str);
186     auto p = line!string(false);
187 
188     auto res1 = p.run(state);
189     assert(res1.success);
190     assert(res1.parsed == "foo");
191 
192     auto res2 = p.run(res1);
193     assert(res2.success);
194     assert(res2.parsed == "bar");
195 }
196 
197 /* Parses several whitespace characters, but no less than one. */
198 auto
199 someWhite(B, C = char)(bool acceptNewlines = false)
200     if (isSomeChar!C)
201 {
202     return many(1, -1, whitespace!(B, C)(acceptNewlines));
203 }
204 unittest
205 {
206     string str1 = "foo   bar";
207     string str2 = "foo\tbar";
208     string str3 = "foobar";
209     string str4 = "foo \n\nbar";
210 
211     auto p1 = literal!int("foo") / someWhite!int(false) / literal!int("bar");
212     auto p2 = literal!int("foo") / someWhite!int(true) / literal!int("bar");
213 
214     assert(p1.match(str1));
215     assert(p1.match(str2));
216     assert(!p1.match(str3));
217     assert(!p1.match(str4));
218 
219     assert(p2.match(str1));
220     assert(p2.match(str2));
221     assert(!p2.match(str3));
222     assert(p2.match(str4));
223 }
224 
225 /* Parses zero or more whitespace characters. */
226 auto
227 maybeWhite(B, C = char)(bool acceptNewlines = false)
228     if (isSomeChar!C)
229 {
230     return many(0, -1, whitespace!(B, C)(acceptNewlines));
231 }
232 unittest
233 {
234     string str1 = "foo   bar";
235     string str2 = "foo\tbar";
236     string str3 = "foobar";
237     string str4 = "foo \n\nbar";
238 
239     auto p1 = literal!int("foo") / maybeWhite!int(false) / literal!int("bar");
240     auto p2 = literal!int("foo") / maybeWhite!int(true) / literal!int("bar");
241 
242     assert(p1.match(str1));
243     assert(p1.match(str2));
244     assert(p1.match(str3));
245     assert(!p1.match(str4));
246 
247     assert(p2.match(str1));
248     assert(p2.match(str2));
249     assert(p2.match(str3));
250     assert(p2.match(str4));
251 }
252 
253 /* Parses several newline characters, but no less than one. */
254 auto 
255 someNewlines(B, C = char)()
256     if (isSomeChar!C)
257 {
258     return many(1, -1, newline!(B, C));
259 }
260 unittest
261 {
262     string str1 = "foo\n\rbar";
263     auto p1 = literal!int("foo")
264         / someNewlines!int
265         / literal!int("bar");
266     assert (p1.match(str1));
267 }
268 
269 /* Parses zero or more newline characters. */
270 auto 
271 maybeNewlines(B, C = char)()
272     if (isSomeChar!C)
273 {
274     return many(0, -1, newline!(B, C));
275 }
276 unittest
277 {
278     string str1 = "foo\n\rbar";
279     auto p1 = literal!int("foo")
280         / someNewlines!int
281         / literal!int("bar");
282     assert (p1.match(str1));
283 }
284 
285 enum Word
286 {
287     any,
288     alnum,
289     alpha
290 }
291 /* Parses either a sequence on non-whitespace characters (if given Word.any) or
292    a sequence of alphanumeric characters (if given Word.alnum) or a sequence of
293    alphabetic characters (if given Word.alpha) of length withing given bounds.
294    Any of the bounds can be negative, which means no limit on the corresponding
295    end.
296    */
297 auto
298 word(B, C = char)(Word type, int minLength = 1, int maxLength = -1)
299     if (isSomeChar!C)
300 {
301     final switch (type) {
302         case Word.any: 
303             return many(minLength, maxLength, nonwhite!(B, C));
304         case Word.alnum:
305             return many(minLength, maxLength, alnum!(B, C));
306         case Word.alpha:
307             return many(minLength, maxLength, alpha!(B, C));
308     }
309 }
310 unittest
311 {
312     string str = "foo12( bar1 ";
313     auto state = ParserState!string(str);
314 
315     auto p1 = word!string(Word.any);
316     auto res1 = p1.run(state);
317     assert(res1.success);
318     assert(res1.parsed == "foo12(");
319 
320     auto p2 = word!string(Word.alnum);
321     auto res2 = p2.run(state);
322     assert(res2.success);
323     assert(res2.parsed == "foo12");
324 
325     auto p3 = word!string(Word.alpha);
326     auto res3 = p3.run(state);
327     assert(res3.success);
328     assert(res3.parsed == "foo");
329 }
330 
331 /* Parses a number. */
332 auto
333 number(B, C = char)()
334     if (isSomeChar!C)
335 {
336     return many(1, -1, digit!(B, C));
337 }
338 unittest
339 {
340     string str1 = "12 12";
341     string str2 = "12f";
342     string str3 = "foo";
343     auto state1 = ParserState!string(str1);
344     auto state2 = ParserState!string(str2);
345 
346     auto p = number!string;
347     auto res1 = p.run(state1);
348     assert(res1.success);
349     assert(res1.parsed == "12");
350 
351     auto res2 = p.run(state2);
352     assert(res2.success);
353     assert(res2.parsed == "12");
354 
355     assert(!p.match(str3));
356 }
357 
358 /* Parses a hexadecimal number. The number may or may not be prefixed by '0x'.
359    The prefix will *not* appear in '.parsed'. 
360  */
361 auto
362 hexnum(B, C = char)()
363     if (isSomeChar!C)
364 {
365     alias S = immutable(C)[];
366     return maybe(literal!S("0x")) / many(1, -1, hexdigit!C);
367 }
368 
369 /* ---------- misc ---------- */
370 
371 /* Parses something one or zero times. */
372 auto
373 maybe(B, S = string)(Parser!(B, S) p)
374     if (isSomeString!S)
375 {
376     return many(0, 1, p);
377 }
378 unittest
379 {
380     string str1 = "foo bar";
381     string str2 = "foobar";
382     string str3 = "foo!bar";
383 
384     auto p = literal!string("foo")
385         / maybe(whitespace!string)
386         / literal!string("bar");
387 
388     assert(p.match(str1));
389     assert(p.match(str2));
390     assert(!p.match(str3));
391 }
392 
393 /* Parses text between balanced pair of given symbols. */
394 auto
395 balanced(B, C = char)(C left, C right, bool keepPair = false)
396     if (isSomeChar!C)
397 {
398     alias S = immutable(C)[];
399     class Res: Parser!(B, S)
400     {
401         override ParserState!(B, S) parse(ParserState!(B, S) toParse)
402         {
403             if (!toParse.success) return toParse.fail;
404             /* There must be space for a pair. */
405             if (toParse.left.length < 2) return toParse.fail;
406             if (toParse.left[0] != left) return toParse.fail;
407 
408             int level = 1;
409             size_t parsed = 1;
410             size_t len = toParse.left.length;
411             while (level != 0 && parsed < len) {
412                 C ch = toParse.left[parsed];
413                 /* Note that the order here is very important. It allows to
414                    use the same character for left and right.
415                    */
416                 if (ch == right) {
417                     level--;
418                     parsed++;
419                     continue;
420                 }
421                 if (ch == left) {
422                     level++;
423                     parsed++;
424                     continue;
425                 }
426                 parsed++;
427             }
428 
429             if (level == 0) {
430                 auto res = toParse;
431                 if (keepPair)
432                     res.parsed = toParse.left[0 .. parsed];
433                 else
434                     res.parsed = toParse.left[1 .. parsed - 1]; 
435                 if (parsed < len)
436                     res.left = res.left[parsed .. $];
437                 else
438                     res.left = [];
439                 return res.succeed;
440             } else {
441                 return toParse.fail;
442             }
443         } /* parse */
444     } /* Res */
445     return new Res();
446 }
447 unittest
448 {
449     string str1 = "(abcdef)";
450     string str2 = "(ab(df))";
451     string str3 = "((asdf)d)";
452     string str4 = "(asdf)f";
453     string str5 = "(asdff";
454     string str6 = "/asdf/";
455     auto state1 = ParserState!int(str1);
456     auto state2 = ParserState!int(str2);
457     auto state3 = ParserState!int(str3);
458     auto state4 = ParserState!int(str4);
459     auto state5 = ParserState!int(str5);
460     auto state6 = ParserState!int(str6);
461     auto p = balanced!int('(', ')');
462     auto p2 = balanced!int('/', '/');
463 
464     auto res1 = p.run(state1);
465     assert(res1.success);
466     assert(res1.parsed == "abcdef");
467 
468     auto res2 = p.run(state2);
469     assert(res2.success);
470     assert(res2.parsed == "ab(df)");
471 
472     auto res3 = p.run(state3);
473     assert(res3.success);
474     assert(res3.parsed == "(asdf)d");
475 
476     auto res4 = p.run(state4);
477     assert(res4.success);
478     assert(res4.parsed == "asdf");
479 
480     auto res5 = p.run(state5);
481     assert(!res5.success);
482 
483     auto res6 = p2.run(state6);
484     assert(res6.success);
485     assert(res6.parsed == "asdf");
486 }
487 
488 /* Parses text between balanced pair of bits that match given parsers. 'left'
489    and 'right' parsers are going to be run many times, so be careful with
490    building inside them. */
491 auto
492 balanced(B, S = string)(Parser!(B, S) left, Parser!(B, S) right, bool keepPair = false)
493     if (isSomeString!S)
494 {
495     class Res: Parser!(B, S)
496     {
497         override ParserState!(B, S) parse(ParserState!(B, S) toParse)
498         {
499             if (!toParse.success) return toParse.fail;
500 
501             auto cur = toParse;
502             cur = left.run(cur);
503             if (!cur.success) return toParse.fail;
504 
505             import std.stdio;
506 
507             int level = 1;
508             size_t start = cur.parsed.length;
509             size_t parsed = start;
510             size_t lastRightLen;
511             size_t len = toParse.left.length;
512             while (level != 0 && parsed < len) {
513                 /* Note the order. It allows using same parsers for left and
514                    right.
515                    */
516                 auto maybeRight = right.run(cur);
517                 if (maybeRight.success) {
518                     level--;
519                     size_t rightLen = maybeRight.parsed.length;
520                     parsed += rightLen;
521                     lastRightLen = rightLen;
522                     cur = maybeRight;
523                     continue;
524                 }
525                 auto maybeLeft = left.run(cur);
526                 if (maybeLeft.success) {
527                     level++;
528                     parsed += maybeLeft.parsed.length;
529                     cur = maybeLeft;
530                     continue;
531                 }
532                 parsed++;
533                 cur.left = cur.left[1 .. $];
534             } /* while level != 0 */
535             if (level != 0) return toParse.fail;
536             auto res = toParse;
537             res.left = cur.left;
538             if (keepPair)
539                 res.parsed = toParse.left[0 .. parsed];
540             else
541                 res.parsed = toParse.left[start .. parsed - lastRightLen];
542             return res.succeed;
543         } /* parse */
544     } /* Res */
545     return new Res();
546 }
547 unittest
548 {
549     string str1 = "foo 1 2 3 bar";
550     string str2 = "foo 1 2 3";
551     string str3 = "foo 1 2 3 foo";
552     auto s1 = ParserState!int(str1);
553     auto s3 = ParserState!int(str3);
554     auto p = balanced!int(literal!int("foo"), literal!int("bar"), false);
555     auto p2 = balanced!int(literal!int("foo"), literal!int("foo"), false);
556 
557     auto res1 = p.run(s1);
558     assert(res1.success);
559     assert(res1.parsed == " 1 2 3 ");
560 
561     assert(!p.match(str2));
562 
563     auto res3 = p2.run(s3);
564     assert(res3.success);
565     assert(res3.parsed == " 1 2 3 ");
566 }
567 
568 /* Parses text until a given parser succeeds. The part that matches the given
569    parser is removed from the input (but can optionally be left there). Fails
570    if nothing matches the parser.
571  */
572 auto
573 upTo(B, S = string)(
574         Parser!(B, S) parser, 
575         bool keepTerminator = false,
576         bool consumeTerminator = true)
577     if (isSomeString!S)
578 {
579     class Res: Parser!(B, S)
580     {
581         override ParserState!(B, S) parse(ParserState!(B, S) toParse)
582         {
583             auto cur = toParse;
584             size_t parsed = 0;
585             while (cur.left.length > 0) {
586                 auto maybeDone = parser.run(cur);
587                 if (maybeDone.success) {
588                     size_t finish = parsed;
589                     if (keepTerminator) parsed += maybeDone.parsed.length;
590                     if (consumeTerminator) finish = parsed;
591                     return maybeDone.succeed(toParse.left[0 .. finish]);
592                 }
593                 parsed++;
594                 cur.left = cur.left[1 .. $];
595             }
596             return toParse.fail;
597         }
598     }
599     return new Res();
600 }
601 unittest
602 {
603     string str1 = "foo bar! baz";
604     string str2 = "foo bar";
605     auto s1 = ParserState!int(str1);
606     auto s2 = ParserState!int(str2);
607     auto p = upTo(literal!int("!"));
608 
609     auto res1 = p.run(s1);
610     assert(res1.success);
611     assert(res1.parsed == "foo bar");
612     assert(res1.left == " baz");
613 
614     auto res2 = p.run(s2);
615     assert(!res2.success);
616 }
617 
618 /* Behaves just like the 'literal' parser from core, but takes a range of 
619    strings as patterns. */
620 auto
621 multiliteral(B, S = string, R)(R range, bool consumeInput = true, bool caseSensitive = true)
622     if (isSomeString!S && isInputRange!R && is(Unqual!(ElementType!R): S))
623 {
624     import std.algorithm;
625     import std.string;
626 
627     auto literals = range.map!(x => caseSensitive ? x : x.toLower);
628     class Res: Parser!(B, S)
629     {
630         override ParserState!(B, S) parse(ParserState!(B, S) toParse)
631         {
632             if (!toParse.success) return toParse.fail;
633             foreach (literal; literals) {
634                 S checkAgainst = caseSensitive ? toParse.left : toParse.left.toLower;
635                 if (checkAgainst.startsWith(literal)) {
636                     if (consumeInput)
637                         toParse.left = toParse.left[literal.length .. $];
638                     return toParse.succeed(literal);
639                 }
640             }
641             return toParse.fail;
642         }
643     }
644     return new Res();
645 }
646 unittest
647 {
648     import std.algorithm;
649     import std.string;
650 
651     string str1 = "foo";
652     string str2 = "bar";
653     string str3 = "BAR";
654 
655     auto s1 = ParserState!int(str1);
656     auto s2 = ParserState!int(str2);
657     auto s3 = ParserState!int(str3);
658     
659     auto p1 = multiliteral!int(["foo", "bar"]);
660     auto p2 = multiliteral!int(["foo", "bar"].map!(x => x.toUpper));
661 
662     auto res1_1 = p1.run(s1);
663     auto res1_2 = p1.run(s2);
664     auto res1_3 = p1.run(s3);
665 
666     assert(res1_1.success);
667     assert(res1_1.parsed == "foo");
668 
669     assert(res1_2.success);
670     assert(res1_2.parsed == "bar");
671 
672     assert(!res1_3.success);
673 
674     auto res2_1 = p2.run(s1);
675     auto res2_2 = p2.run(s2);
676     auto res2_3 = p2.run(s3);
677 
678     assert(!res2_1.success);
679     assert(!res2_2.success);
680     assert(res2_3.success);
681     assert(res2_3.parsed == "BAR");
682 }
683 
684 /* Succeeds only if the input is empty. */
685 auto
686 endOfInput(B, S = string)()
687     if (isSomeString!S)
688 {
689     class Res: Parser!(B, S)
690     {
691         override ParserState!(B, S) parse(ParserState!(B, S) toParse)
692         {
693             if (toParse.left == "")
694                 return toParse.succeed("");
695             else
696                 return toParse.fail;
697         }
698     }
699     return new Res;
700 }
701 unittest
702 {
703     string str1 = "foobar";
704     string str2 = "foo";
705 
706     auto s1 = ParserState!int(str1);
707     auto s2 = ParserState!int(str2);
708 
709     auto p1 = literal!int("foo") 
710         * endOfInput!int;
711 
712     auto res1_1 = p1.run(s1);
713     assert(!res1_1.success);
714 
715     auto res1_2 = p1.run(s2);
716     assert(res1_2.success);
717     assert(res1_2.parsed == "foo");
718 }