1 /* Core module 2 3 Provides base Parser interface and some basic parser generators. 4 5 If you need to write a parser yourself, be sure to make it short-circuiting 6 - include 'if (!toParse.success) return toParse.fail;' as the first line of 7 its 'run' method (unless it's inherently oblivious, see below). 8 9 Parsers can be made 'oblivious' by using makeOblivious method. An oblivious 10 parser doesn't care if the previous chain has failed, it'll try to parse 11 anyway. Oblivious parsers can fail or succeed as usual. This provides an 12 ability to: one, recover from parser errors (kind of like | operator does), 13 and, two, perform some operations if the chain has failed. For an example of 14 the latter see the 'throwOnFailure' parser. 15 16 I also recommend making an alias with type of your built-up value and using 17 that instead of having the type everywhere. This way changing it later to 18 something more appropriate will be a lot easier. Also, less typing if you 19 choose a short alias. 20 21 */ 22 23 module parsed.core; 24 25 import std.traits; 26 27 /* ---------- base ---------- */ 28 29 struct ParserState(B, S = string) /* B(uild) and S(tring). */ 30 if (isSomeString!S) 31 { 32 private alias ThisState = ParserState!(B, S); 33 34 S left; 35 S parsed; 36 B value; 37 bool success = true; 38 bool recovered = false; 39 40 private struct Slice 41 { 42 size_t start, end; 43 } 44 45 this(B init, S toParse) 46 { 47 value = init; 48 left = toParse; 49 } 50 51 this(S toParse) 52 { 53 value = B.init; 54 left = toParse; 55 } 56 57 /* ---------- slicing overloads ---------- */ 58 59 Slice opSlice(size_t Dim)(size_t start, size_t end) 60 { 61 return Slice(start, end); 62 } 63 64 size_t opDollar(size_t Dim)() 65 { 66 return left.length; 67 } 68 69 ThisState opIndex(Slice slice) 70 { 71 auto res = this; 72 res.left = left[slice.start .. slice.end]; 73 return res; 74 } 75 76 /* ---------- manipulation ---------- */ 77 78 ThisState fail() 79 { 80 auto res = this; 81 res.success = false; 82 res.parsed = ""; 83 return res; 84 } 85 86 ThisState succeed() 87 { 88 auto res = this; 89 res.success = true; 90 res.recovered = false; 91 return res; 92 } 93 94 ThisState succeed(S withParsed) 95 { 96 auto res = this; 97 res.success = true; 98 res.recovered = false; 99 res.parsed = withParsed; 100 return res; 101 } 102 103 ThisState build(B delegate (B, const S) dg) 104 { 105 auto res = this; 106 if (success) { 107 res.value = dg(value, parsed); 108 return res.succeed; 109 } else { 110 return res.fail; 111 } 112 } 113 114 ThisState absorb(B2)( 115 ParserState!(B2, S) other, 116 B delegate (B, B2, const S) dg) 117 { 118 auto res = this; 119 if (success) { 120 res.value = dg(value, other.value, other.parsed); 121 res.left = other.left; 122 res.parsed = other.parsed; 123 return res.succeed; 124 } else { 125 return res.fail; 126 } 127 } 128 129 ThisState pass() 130 { 131 auto res = this; 132 res.success = true; 133 res.recovered = false; 134 res.parsed = null; 135 return res; 136 } 137 138 ThisState recover() 139 { 140 auto res = this; 141 res.recovered = true; 142 res.success = true; 143 return res; 144 } 145 } 146 unittest 147 { 148 string str1 = "foo bar"; 149 150 auto s1 = ParserState!int(str1); 151 152 auto slice1_1 = s1[0 .. 3]; 153 auto slice1_2 = s1[3 .. $]; 154 assert(slice1_1.left == "foo"); 155 assert(slice1_2.left == " bar"); 156 } 157 158 /* B(uild) and S(tring). */ 159 class Parser(B, S = string) 160 if (isSomeString!S) 161 { 162 private: 163 164 alias State = ParserState!(B, S); 165 alias ThisParser = Parser!(B, S); 166 alias Group = ParserGroup!(B, S); 167 168 /* This should be true if the parser is able to operate even if the chain 169 is in the failed state. */ 170 protected bool oblivious_ = false; 171 protected LookaheadMode lookahead = LookaheadMode.none; 172 173 public: 174 175 bool oblivious() const @property { return oblivious_; } 176 177 /* ---------- high-level operations ---------- */ 178 179 /* A wrapper over 'parse' that performs some operations. */ 180 State run(State toParse) 181 { 182 if (!toParse.success && oblivious) 183 toParse = toParse.recover; 184 if (!toParse.success) 185 return toParse.fail; 186 return parse(toParse); 187 } 188 189 /* Runs the parser on the given text. */ 190 final State run(S text) 191 { 192 return run(State(B.init, text)); 193 } 194 195 /* Returns true if the parser succeeds on the text. */ 196 final bool match(S text) 197 { 198 auto res = run(text); 199 return res.success; 200 } 201 202 /* Main method. */ 203 protected abstract State parse(State toParse); 204 205 /* ---------- parser combinations ---------- */ 206 207 /* Builds up a value. */ 208 ThisParser build(B delegate (B, const S) dg) 209 { 210 class Res: ThisParser 211 { 212 this() 213 { 214 oblivious_ = this.outer.oblivious; 215 lookahead = this.outer.lookahead; 216 } 217 218 override State parse(State toParse) 219 { 220 auto outer = this.outer; 221 auto res = outer.parse(toParse); 222 if (res.success) { 223 res.value = dg(res.value, res.parsed); 224 return res.succeed; 225 } else { 226 return res.fail; 227 } 228 } /* parse */ 229 } /* Res */ 230 return new Res(); 231 } /* build */ 232 233 Group chain(ThisParser other, bool concat, bool prepend) 234 { 235 auto res = new Group(GroupType.and, false); 236 if (prepend) 237 res.parsers = [other, this]; 238 else 239 res.parsers = [this, other]; 240 res.concat = [false, concat]; 241 return res; 242 } 243 244 Group chain(Group other, bool concat, bool prepend) 245 { 246 return other.chain(this, concat, prepend); 247 } 248 249 /* Returns state of the first parser of the two to succeed. */ 250 Group any(ThisParser other, bool prepend) 251 { 252 auto res = new Group(GroupType.or, false); 253 if (prepend) 254 res.parsers = [other, this]; 255 else 256 res.parsers = [this, other]; 257 return res; 258 } 259 260 /* An overload for Groups. */ 261 Group any(Group other, bool prepend) 262 { 263 return other.any(this, prepend); 264 } 265 266 /* Make a new parser that discards original parser's 'parsed' and sets it 267 to an empty string */ 268 ThisParser discard() 269 { 270 class Res: ThisParser 271 { 272 this() 273 { 274 oblivious_ = this.outer.oblivious; 275 lookahead = this.outer.lookahead; 276 } 277 278 override State parse(State toParse) 279 { 280 auto res = this.outer.parse(toParse); 281 if (res.success) { 282 res.parsed = ""; 283 return res.succeed; 284 } else { 285 return toParse.fail; 286 } 287 } /* parse */ 288 } /* Res */ 289 return new Res(); 290 } /* discard */ 291 292 ThisParser makeOblivious() 293 { 294 class Res: ThisParser 295 { 296 this() 297 { 298 oblivious_ = true; 299 lookahead = this.outer.lookahead; 300 } 301 302 override State parse(State toParse) 303 { 304 return this.outer.parse(toParse); 305 } 306 } 307 return new Res(); 308 } 309 310 /* Need a group because that's where lookahead driver is. */ 311 Group makeReluctant() 312 { 313 class Res: ThisParser 314 { 315 this() 316 { 317 lookahead = LookaheadMode.reluctant; 318 oblivious_ = this.outer.oblivious_; 319 } 320 321 override State parse(State toParse) 322 { 323 return this.outer.parse(toParse); 324 } 325 } 326 return new Group(new Res, false); 327 } 328 329 /* Need a group because that's where lookahead driver is. */ 330 Group makeGreedy() 331 { 332 class Res: ThisParser 333 { 334 this() 335 { 336 lookahead = LookaheadMode.greedy; 337 oblivious_ = this.outer.oblivious_; 338 } 339 340 override State parse(State toParse) 341 { 342 return this.outer.parse(toParse); 343 } 344 } 345 return new Group(new Res, false); 346 } 347 348 /* ---------- operator overloads ---------- */ 349 350 /* Infix analog of 'chain' without parsed string concatenation. Think of 351 '/' as a wall where flow stops. */ 352 Group opBinary(string op)(ThisParser other) 353 if (op == "/") 354 { 355 return chain(other, false, false); 356 } 357 358 /* Infix analog of 'chain' with parsed string concatenation. Think of '*' 359 as of a piece of chain. */ 360 Group opBinary(string op)(ThisParser other) 361 if (op == "*") 362 { 363 return chain(other, true, false); 364 } 365 366 /* Infix analog of 'build'. I've got no clever analogy as to why it's '%'. 367 The real reason is that '%' is in the same precedence group as '*' and 368 '/'. */ 369 ThisParser opBinary(string op)(B delegate (B, const S) dg) 370 if (op == "%") 371 { 372 return build(dg); 373 } 374 375 /* Infix analog of 'any' */ 376 Group opBinary(string op)(ThisParser other) 377 if (op == "|") 378 { 379 return any(other, false); 380 } 381 } 382 unittest 383 { 384 /* Building test. */ 385 386 import std.conv; 387 388 string str2 = "1 2 3"; 389 auto state = ParserState!int(0, str2); 390 391 auto p = (literal!int("1") | literal!int("2") | literal!int("3")) 392 % (int i, string s) => i + to!int(s); 393 auto space = literal!int(" "); 394 auto sum = p * space * p * space * p; 395 auto res = sum.run(state); 396 assert(res.success); 397 assert(res.parsed == "1 2 3"); 398 assert(res.value == 6); 399 } 400 unittest 401 { 402 /* Discard and chaining test. */ 403 404 string str1 = "foo bar"; 405 auto s1 = ParserState!int(str1); 406 407 auto p1 = literal!int("foo") 408 * literal!int(" ").discard 409 * literal!int("bar"); 410 411 auto res1 = p1.run(str1); 412 assert(res1.success); 413 assert(res1.parsed == "foobar"); 414 } 415 416 /* B(uild) and S(tring). */ 417 private class ParserGroup(B, S = string): Parser!(B, S) 418 if (isSomeString!S) 419 { 420 /* All members are private unless stated otherwise. */ 421 private: 422 423 Parser!(B, S)[] parsers; 424 bool[] concat; 425 GroupType type; 426 bool monolithic; 427 428 public override bool oblivious() const @property 429 { 430 import std.algorithm; 431 return parsers.any!(x => x.oblivious); 432 } 433 434 alias Group = ParserGroup!(B, S); 435 alias ThisParser = Parser!(B, S); 436 alias State = ParserState!(B, S); 437 438 this(GroupType type, bool monolithic) 439 { 440 this.type = type; 441 this.monolithic = monolithic; 442 } 443 444 this(Group original, bool monolithic) 445 { 446 parsers = original.parsers.dup; 447 concat = original.concat.dup; 448 type = original.type; 449 this.monolithic = monolithic; 450 } 451 452 this(ThisParser parser, bool monolithic) 453 { 454 this(GroupType.and, monolithic); 455 lookahead = parser.lookahead; 456 parsers = [parser]; 457 concat = [false]; 458 } 459 460 /* Obliviousness handling is a bit different in groups. */ 461 public override State run(State toParse) 462 { 463 if (toParse.success || oblivious) { 464 return parse(toParse); 465 } else { 466 return toParse.fail; 467 } 468 } 469 470 public alias run = ThisParser.run; /* Allows other overloads of run. */ 471 472 protected override State parse(State toParse) 473 { 474 if (type == GroupType.and) { 475 /* Sequential application of parsers. */ 476 return tryRun(toParse, 0, parsers); 477 } else { 478 /* Alternative application of parsers. */ 479 State save = toParse; 480 foreach (current; parsers) { 481 auto maybeRes = current.run(save); 482 if (maybeRes.success) return maybeRes.succeed; 483 } 484 return toParse.fail; 485 } /* if type == GroupType.and */ 486 } /* parse */ 487 488 /* A helper function to deal with in-chain lookahead. */ 489 State tryRun(State tryParse, 490 size_t i, 491 ThisParser[] leftParsers) 492 { 493 if (leftParsers == []) { 494 if (tryParse.success) 495 return tryParse.succeed; 496 else 497 return tryParse.fail; 498 } 499 500 /* A helper function to avoid excessive typing. */ 501 void prepend(ref State to, State prep) { 502 if (concat[i] && to.success) 503 to.parsed = prep.parsed ~ to.parsed; 504 } 505 506 ThisParser current = leftParsers[0]; 507 final switch (current.lookahead) { 508 case LookaheadMode.none: 509 auto newState = current.run(tryParse); 510 prepend(newState, tryParse); 511 return tryRun(newState, i + 1, leftParsers[1 .. $]); 512 case LookaheadMode.reluctant: 513 size_t end = 0; 514 size_t len = tryParse.left.length; 515 while (end <= len) { 516 State substate = tryParse[0 .. end]; 517 State newState = current.run(substate); 518 if (newState.success) { 519 newState.left ~= tryParse.left[end .. $]; 520 prepend(newState, tryParse); 521 State newerState = tryRun(newState, i + 1, leftParsers[1 .. $]); 522 if (newerState.success) 523 return newerState.succeed; 524 else 525 end++; 526 } else { 527 end++; 528 } 529 } /* while end < len */ 530 return tryParse.fail; 531 case LookaheadMode.greedy: 532 size_t len = tryParse.left.length; 533 size_t end = len; 534 while (end > 0) { 535 State substate = tryParse[0 .. end]; 536 State newState = current.run(substate); 537 if (newState.success) { 538 newState.left ~= tryParse.left[end .. $]; 539 prepend(newState, tryParse); 540 State newerState = tryRun(newState, i + 1, leftParsers[1 .. $]); 541 if (newerState.success) 542 return newerState.succeed; 543 else 544 end--; 545 } else { 546 end--; 547 } 548 } /* while end > 0 */ 549 return tryParse.fail; 550 } /* switch current.lookahead */ 551 } /* tryRun */ 552 553 public final Group makeMonolithic() 554 { 555 return new Group(this, true); 556 } 557 558 /* Either append or prepend a parser to the chain. */ 559 public override Group chain(ThisParser other, bool concat, bool prepend) 560 { 561 /* We simply wrap the group and the other parser in a new group in two 562 cases: when the group is monolithic, or when we can't chain extra 563 parsers to the group (because it's not an actual chain, it's a 564 choice construct). */ 565 if (monolithic || type == GroupType.or) { 566 auto res = new Group(this, false); 567 if (prepend) { 568 res.parsers = [other, this]; 569 res.concat = [false, concat]; 570 } else { 571 res.parsers = [this, other]; 572 res.concat = [false, concat]; 573 } 574 return res; 575 } else { 576 auto res = new Group(this, false); 577 if (prepend) { 578 res.parsers = other ~ parsers; 579 res.concat = false ~ this.concat; 580 } else { 581 res.parsers = parsers ~ other; 582 res.concat = this.concat ~ concat; 583 } 584 return res; 585 } /* if monolithic */ 586 } /* chain */ 587 588 /* Same, but add a chain instead of an individual parser. */ 589 public override Group chain(Group other, bool concat, bool prepend) 590 { 591 auto res = new Group(GroupType.and, false); 592 /* We treat OR groups as monolithic, because we can't add elements to 593 them in this method. */ 594 if (monolithic && other.monolithic 595 || type == GroupType.or 596 || other.type == GroupType.or) { 597 /* Produce a simple Group that uses both of these without 598 unwrapping them. */ 599 if (prepend) 600 res.parsers = [other, this]; 601 else 602 res.parsers = [this, other]; 603 res.concat = [false, concat]; 604 } else if (monolithic && !other.monolithic) { 605 if (prepend) { 606 res.parsers = other.parsers ~ this; 607 res.concat = other.concat ~ concat; 608 } else { 609 res.parsers = this ~ other.parsers; 610 res.concat = false ~ other.concat; 611 } 612 } else if (!monolithic && other.monolithic) { 613 if (prepend) { 614 res.parsers = other ~ parsers; 615 res.concat = false ~ this.concat; 616 res.concat[1] = concat; 617 } else { 618 res.parsers = parsers ~ other; 619 res.concat = this.concat ~ concat; 620 } 621 } else { 622 if (prepend) { 623 size_t middle = other.parsers.length; 624 res.parsers = other.parsers ~ parsers; 625 res.concat = other.concat ~ this.concat; 626 res.concat[middle] = concat; 627 } else { 628 size_t middle = parsers.length; 629 res.parsers = parsers ~ other.parsers; 630 res.concat = this.concat ~ other.concat; 631 res.concat[middle] = concat; 632 } 633 } /* if monilithic combination */ 634 return res; 635 } /* chain */ 636 637 public override Group any(ThisParser other, bool prepend) 638 { 639 /* We treat AND groups as monolithic because we can't add elements to 640 them in this method. */ 641 auto res = new Group(GroupType.or, false); 642 if (monolithic || type == GroupType.and) { 643 if (prepend) 644 res.parsers = [other, this]; 645 else 646 res.parsers = [this, other]; 647 } else { 648 if (prepend) 649 res.parsers = other ~ parsers; 650 else 651 res.parsers = parsers ~ other; 652 } 653 return res; 654 } 655 656 public override Group any(Group other, bool prepend) 657 { 658 /* We treat AND groups as monolithic because we can't add elements to 659 them in this method. */ 660 auto res = new Group(GroupType.or, false); 661 if (monolithic && other.monolithic 662 || type == GroupType.and 663 || type == GroupType.and) { 664 if (prepend) 665 res.parsers = [other, this]; 666 else 667 res.parsers = [this, other]; 668 } else if (monolithic && !other.monolithic) { 669 if (prepend) 670 res.parsers = other.parsers ~ this; 671 else 672 res.parsers = this ~ other.parsers; 673 } else if (!monolithic && other.monolithic) { 674 if (prepend) 675 res.parsers = other ~ parsers; 676 else 677 res.parsers = parsers ~ other; 678 } else { 679 if (prepend) 680 res.parsers = other.parsers ~ parsers; 681 else 682 res.parsers = parsers ~ other.parsers; 683 } /* if monolithic combination */ 684 return res; 685 } /* any */ 686 } 687 unittest 688 { 689 /* Greed and reluctance test. */ 690 691 string str1 = "foo!bar!"; 692 auto s1 = ParserState!int(str1); 693 694 auto p1 = everything!int.makeReluctant * literal!int("!"); 695 auto p2 = everything!int.makeGreedy * literal!int("!"); 696 697 auto res1_1 = p1.run(s1); 698 assert(res1_1.success); 699 assert(res1_1.parsed == "foo!"); 700 701 auto res2_1 = p2.run(s1); 702 assert(res2_1.success); 703 assert(res2_1.parsed == "foo!bar!"); 704 } 705 unittest 706 { 707 /* One more greed and reluctance test; this time the greedy/reluctant 708 parser is the sole parser in the chain. */ 709 710 string str1 = "111"; 711 auto s1 = ParserState!int(str1); 712 713 auto p1 = many(1, -1, literal!int("1")).makeReluctant; 714 auto p2 = many(1, -1, literal!int("1")).makeGreedy; 715 716 auto res1_1 = p1.run(s1); 717 assert(res1_1.success); 718 assert(res1_1.parsed == "1"); 719 720 auto res2_1 = p2.run(s1); 721 assert(res2_1.success); 722 assert(res2_1.parsed == "111"); 723 } 724 unittest 725 { 726 /* Obliviousness test. */ 727 728 string str1 = "foobar"; 729 auto s1 = ParserState!int(str1); 730 731 auto p1 = literal!int("BAZ") 732 / literal!int("foo").makeOblivious 733 * literal!int("bar"); 734 735 auto res1_1 = p1.run(s1); 736 assert(res1_1.success); 737 assert(res1_1.parsed == "foobar"); 738 } 739 unittest 740 { 741 /* Obliviousness and lookahead test. */ 742 743 string str1 = "12121"; 744 auto s1 = ParserState!int(str1); 745 746 auto p1 = literal!int("0") 747 / everything!int.makeOblivious.makeReluctant 748 * literal!int("1"); 749 auto p2 = literal!int("0") 750 / everything!int.makeOblivious.makeGreedy 751 * literal!int("1"); 752 753 auto res1_1 = p1.run(s1); 754 assert(res1_1.success); 755 assert(res1_1.parsed == "1"); 756 757 auto res2_1 = p2.run(s1); 758 assert(res2_1.success); 759 assert(res2_1.parsed == "12121"); 760 } 761 762 private enum LookaheadMode 763 { 764 none, 765 greedy, 766 reluctant, 767 } 768 769 private enum GroupType 770 { 771 and, 772 or 773 } 774 775 /* ---------- fundamental parsers ---------- */ 776 777 /* Parses a literal string (case-sensitive by default). */ 778 auto 779 literal(B, S = string)(const S str, bool consumeInput = true, bool caseSensitive = true) 780 if (isSomeString!S) 781 { 782 import std.string; 783 784 S use = str; 785 if (!caseSensitive) use = use.toLower; 786 class Res: Parser!(B, S) 787 { 788 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 789 { 790 S checkAgainst = toParse.left; 791 if (!caseSensitive) checkAgainst = checkAgainst.toLower; 792 if (checkAgainst.startsWith(use)) { 793 if (consumeInput) 794 toParse.left = toParse.left[use.length .. $]; 795 return toParse.succeed(str); 796 } 797 return toParse.fail; 798 } 799 } 800 return new Res(); 801 } 802 unittest 803 { 804 string str = "Hello world"; 805 806 auto p1 = literal!int("Hello"); 807 assert(p1.match(str)); 808 809 auto p2 = literal!int("hello", true, false); 810 assert(p2.match(str)); 811 812 assert(!(p1 * p2).match(str)); 813 814 auto p3 = literal!int("Hello") 815 * literal!int(" ") 816 * literal!int("world"); 817 assert(p3.match(str)); 818 } 819 820 /* Always fails. Useful to terminate 'many'. */ 821 auto 822 fail(B, S = string)() 823 if (isSomeString!S) 824 { 825 class Res: Parser!(B, S) 826 { 827 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 828 { 829 return toParse.fail; 830 } 831 } 832 return new Res(); 833 } 834 unittest 835 { 836 string str = "foo"; 837 auto p = fail!string; 838 839 assert(!p.match(str)); 840 } 841 842 /* Always succeeds. Useful if the first thing in the chain you want to do is to 843 build value and you dislike (relative) clunkiness of 'build' parser. 844 */ 845 auto 846 succeed(B, S = string)() 847 if (isSomeString!S) 848 { 849 class Res: Parser!(B, S) 850 { 851 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 852 { 853 return toParse.succeed(""); 854 } 855 } 856 return new Res(); 857 } 858 unittest 859 { 860 string str = "foo"; 861 assert(succeed!string.match(str)); 862 } 863 864 /* Fails if given condition returns false, succeeds consuming no input otherwise. */ 865 auto 866 test(B, S = string)(bool delegate (B, const S) tst) 867 if (isSomeString!S) 868 { 869 class Res: Parser!(B, S) 870 { 871 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 872 { 873 if (tst(toParse.value, toParse.parsed)) 874 return toParse.succeed(""); 875 else 876 return toParse.fail; 877 } 878 } 879 return new Res(); 880 } 881 unittest 882 { 883 import std.conv; 884 885 string str = "12"; 886 auto state = ParserState!int(str); 887 888 auto p = literal!int("12") 889 % ((res, i) => to!int(i)) 890 / test!int((res, s) => res > 5); 891 auto res = p.run(state); 892 assert(res.success); 893 } 894 895 /* Builds a value from previous parser's output. Always succeeds. */ 896 auto 897 build(B, S = string)(B delegate (B, const S) dg) 898 { 899 class Res: Parser!(B, S) 900 { 901 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 902 { 903 auto res = toParse; 904 res.value = dg(toParse.value, toParse.parsed); 905 res.parsed = ""; 906 return res; 907 } 908 } 909 return new Res(); 910 } 911 unittest 912 { 913 string str = "foo"; 914 auto state = ParserState!int(str); 915 916 auto p = build!int((res, s) => 10); 917 auto res = p.run(state); 918 assert(res.success); 919 assert(res.value == 10); 920 } 921 922 /* Makes a copy of '.parsed' and '.left'. Can be used to drop a long string 923 from the memory if only a small portion of it is used. Always succeeds. 924 */ 925 auto 926 force(B, S = string)() 927 { 928 class Res: Parser!(B, S) 929 { 930 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 931 { 932 auto res = toParse; 933 res.left = toParse.left.dup; 934 res.parsed = toParse.parsed.dup; 935 return res.succeed(""); 936 } 937 } 938 return new Res(); 939 } 940 941 /* Uses the same parser between 'min' and 'max' times. If either of 'min' and 942 'max' is negative, there's no limit on corresponding allowed amount of 943 times. Value is passed from each run to the next one, with resulting 944 parser inheriting the value from the last run. 945 */ 946 auto 947 many(B, S = string)(int min, int max, Parser!(B, S) p) 948 if (isSomeString!S) 949 { 950 class Res: Parser!(B, S) 951 { 952 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 953 { 954 S parsed; 955 ParserState!(B, S) cur = toParse.succeed; 956 957 /* Check required minimum of successful parses. */ 958 size_t n = 0; 959 if (min > 0) { 960 while (n < min) { 961 cur = p.run(cur); 962 if (!cur.success) return toParse.fail; 963 n++; 964 parsed ~= cur.parsed; 965 } 966 } 967 968 /* Parse the rest. */ 969 B value = cur.value; 970 while ((max < 0 || n < max) && cur.success) { 971 cur = p.run(cur); 972 if (!cur.success) 973 break; 974 else 975 value = cur.value; 976 n++; 977 parsed ~= cur.parsed; 978 } 979 cur.value = value; 980 return cur.succeed(parsed); 981 } 982 } 983 return new Res(); 984 } 985 unittest 986 { 987 import std.conv; 988 989 string str1 = "123 12 13"; 990 string str2 = "foo bar"; 991 auto s1 = ParserState!int(str1); 992 auto s2 = ParserState!int(str2); 993 auto digit = singleChar!int(ch => '0' <= ch && ch <= '9'); 994 995 auto p1 = many(1, -1, digit) % (res, i) => to!int(i); 996 auto p2 = many(1, -1, digit % (res, i) => res * 10 + to!int(i)); 997 998 auto res1_1 = p1.run(s1); 999 assert(res1_1.success); 1000 assert(res1_1.value == 123); 1001 auto res1_2 = p1.run(s2); 1002 assert(!res1_2.success); 1003 1004 auto res2_1 = p2.run(s1); 1005 assert(res2_1.success); 1006 assert(res2_1.value == 123); 1007 auto res2_2 = p2.run(s2); 1008 assert(!res2_2.success); 1009 } 1010 1011 /* Uses a subparser and absorbs its built value into main chain's one by 1012 passing it through a given delegate. 1013 */ 1014 auto 1015 absorb(B, B2, S = string)(B delegate (B, B2, const S) dg, Parser!(B2, S) subparser) 1016 if (isSomeString!S) 1017 { 1018 class Res: Parser!(B, S) 1019 { 1020 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 1021 { 1022 auto sendToSub = ParserState!(B2, S)(toParse.left); 1023 auto returned = subparser.parse(sendToSub); 1024 if (returned.success) { 1025 return toParse.absorb(returned, dg); 1026 } else { 1027 return toParse.fail; 1028 } 1029 } 1030 } 1031 return new Res(); 1032 } 1033 unittest 1034 { 1035 import std.conv; 1036 1037 string str = "12 25"; 1038 1039 auto digit = literal!int("1") | literal!int("2") | literal!int("5"); 1040 auto someint = many(1, -1, digit) % (int i, string s) => to!int(s); 1041 1042 auto list = 1043 absorb!(int[], int)((l, i, s) => [i], someint) 1044 / literal!(int[])(" ") 1045 / absorb!(int[], int)((l, i, s) => l ~ i, someint); 1046 auto res = list.run(str); 1047 assert(res.success); 1048 assert(res.value == [12, 25]); 1049 } 1050 1051 /* Run previous parser's parsed string through a function and substitute the 1052 result for parsed string. 1053 */ 1054 auto 1055 morph(B, S = string)(S delegate (const S) dg) 1056 if (isSomeString!S) 1057 { 1058 class Res: Parser!(B, S) 1059 { 1060 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 1061 { 1062 return toParse.succeed(dg(toParse.parsed)); 1063 } 1064 } 1065 return new Res(); 1066 } 1067 unittest 1068 { 1069 import std.string; 1070 1071 string str = "FOO bar"; 1072 auto state = ParserState!string(str); 1073 auto p = literal!string("FOO") 1074 / morph!string(s => s.toLower); 1075 auto res = p.run(state); 1076 assert(res.success); 1077 assert(res.parsed == "foo"); 1078 } 1079 1080 /* Parses a single character if it passes a given test. */ 1081 auto 1082 singleChar(B, C = char)(bool delegate (C) test) 1083 if (isSomeChar!C) 1084 { 1085 alias S = immutable(C)[]; 1086 class Res: Parser!(B, S) 1087 { 1088 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 1089 { 1090 if (toParse.left.length == 0) return toParse.fail; 1091 auto res = toParse; 1092 C ch = toParse.left[0]; 1093 S s = [ch]; 1094 if (test(ch)) { 1095 res.left = res.left[1 .. $]; 1096 return res.succeed(s); 1097 } else { 1098 return toParse.fail; 1099 } 1100 } 1101 } 1102 return new Res(); 1103 } 1104 unittest 1105 { 1106 import std.conv; 1107 1108 string str = "123 12"; 1109 auto state = ParserState!int(str); 1110 1111 auto digit = singleChar!int(c => '0' <= c && c <= '9'); 1112 auto num = many(1, -1, digit) % (i, s) => to!int(s); 1113 auto res = num.run(state); 1114 assert(res.success); 1115 assert(res.value == 123); 1116 } 1117 1118 /* Throws an exception if the parser chain is in the success state. */ 1119 auto 1120 throwOnSuccess(B, S = string)(Exception exc) 1121 if (isSomeString!S) 1122 { 1123 class Res: Parser!(B, S) 1124 { 1125 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 1126 { 1127 throw exc; 1128 } 1129 } 1130 return new Res(); 1131 } 1132 unittest 1133 { 1134 import std.exception; 1135 1136 string str1 = "foobar"; 1137 string str2 = "fooBAR"; 1138 auto p = literal!int("foo") / 1139 literal!int("bar") / 1140 throwOnSuccess!int(new Exception("Parse succesful")); 1141 1142 assertThrown(p.match(str1)); 1143 assertNotThrown(p.match(str2)); 1144 } 1145 1146 /* Throws an exception if the parser chain is in the failed state. */ 1147 auto 1148 throwOnFailure(B, S = string)(Exception exc) 1149 if (isSomeString!S) 1150 { 1151 class Res: Parser!(B, S) 1152 { 1153 this() { oblivious_ = true; } 1154 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 1155 { 1156 if (toParse.recovered) 1157 throw exc; 1158 else 1159 return toParse.succeed; 1160 } 1161 } 1162 return new Res(); 1163 } 1164 unittest 1165 { 1166 import std.exception; 1167 1168 string str1 = "foobar"; 1169 string str2 = "fooBAR"; 1170 auto p = literal!int("foo") / 1171 literal!int("bar") / 1172 throwOnFailure!int(new Exception("Parse failed")); 1173 1174 assertNotThrown(p.match(str1)); 1175 assertThrown(p.match(str2)); 1176 } 1177 1178 /* Throws an exception. */ 1179 auto 1180 throwAnyway(B, S = string)(Exception exc) 1181 if (isSomeString!S) 1182 { 1183 class Res: Parser!(B, S) 1184 { 1185 this() { oblivious_ = true; } 1186 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 1187 { 1188 throw exc; 1189 } 1190 } 1191 return new Res(); 1192 } 1193 unittest 1194 { 1195 import std.exception; 1196 1197 string str1 = "foobar"; 1198 string str2 = "fooBAR"; 1199 auto p = literal!int("foo") / 1200 literal!int("bar") / 1201 throwAnyway!int(new Exception("Throwing in any case")); 1202 1203 assertThrown(p.match(str1)); 1204 assertThrown(p.match(str2)); 1205 } 1206 1207 /* Parses the rest of the input. */ 1208 auto 1209 everything(B, S = string)() 1210 if (isSomeString!S) 1211 { 1212 class Res: Parser!(B, S) 1213 { 1214 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 1215 { 1216 auto res = toParse; 1217 res.parsed = toParse.left; 1218 res.left = ""; 1219 return res.succeed; 1220 } 1221 } 1222 return new Res(); 1223 } 1224 unittest 1225 { 1226 string str1 = "foobar"; 1227 1228 auto s1 = ParserState!int(str1); 1229 1230 auto p1 = everything!int; 1231 auto p2 = literal!int("foo") / everything!int; 1232 1233 auto res1_1 = p1.run(s1); 1234 assert(res1_1.success); 1235 assert(res1_1.parsed == "foobar"); 1236 1237 auto res2_1 = p2.run(s1); 1238 assert(res2_1.success); 1239 assert(res2_1.parsed == "bar"); 1240 } 1241 1242 /* Catches an exception that might occur in another parser. If an exception is 1243 thrown inside 'main' parser, it is considered failed and 'onException' 1244 parser is run on the original input. */ 1245 auto 1246 except(E, B, S = string)(Parser!(B, S) main, Parser!(B, S) onException) 1247 if (isSomeString!S) 1248 { 1249 class Res: Parser!(B, S) 1250 { 1251 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 1252 { 1253 try { 1254 return main.run(toParse); 1255 } catch (E e) { 1256 return onException.run(toParse); 1257 } 1258 } 1259 } 1260 return new Res(); 1261 } 1262 unittest 1263 { 1264 import std.conv; 1265 1266 string str1 = "12"; 1267 string str2 = "12d"; 1268 1269 auto s1 = ParserState!int(str1); 1270 auto s2 = ParserState!int(str2); 1271 1272 auto base = everything!int % (res, s) => s.to!int; 1273 1274 auto p1 = base.except!(ConvException, int)(build!int((res, s) => 0)); 1275 1276 auto res1_1 = p1.run(s1); 1277 assert(res1_1.success); 1278 assert(res1_1.value == 12); 1279 1280 auto res1_2 = p1.run(s2); 1281 assert(res1_2.success); 1282 assert(res1_2.value == 0); 1283 } 1284 1285 /* Catches an exception that might occur in another parser. If such an 1286 exception is thrown, this overload fails. */ 1287 auto 1288 except(E, B, S = string)(Parser!(B, S) main) 1289 if (isSomeString!S) 1290 { 1291 class Res: Parser!(B, S) 1292 { 1293 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 1294 { 1295 try { 1296 return main.run(toParse); 1297 } catch (E e) { 1298 return toParse.fail; 1299 } 1300 } 1301 } 1302 return new Res(); 1303 } 1304 unittest 1305 { 1306 import std.conv; 1307 1308 string str1 = "12"; 1309 string str2 = "12d"; 1310 1311 auto s1 = ParserState!int(str1); 1312 auto s2 = ParserState!int(str2); 1313 1314 auto base = everything!int % (res, s) => s.to!int; 1315 1316 auto p1 = base.except!(ConvException, int)(); 1317 1318 auto res1_1 = p1.run(s1); 1319 assert(res1_1.success); 1320 assert(res1_1.value == 12); 1321 1322 auto res1_2 = p1.run(s2); 1323 assert(!res1_2.success); 1324 } 1325 1326 /* ---------- conditional parsers ---------- */ 1327 1328 /* Be extra careful with the following parsers: they always succeed and are not 1329 guaranteed to consume any input. This can lead to infinite loops. 1330 */ 1331 1332 /* Parses characters while a condition is met. */ 1333 auto 1334 charWhile(B, C = char)(bool delegate (C) test, bool keepTerminator = true) 1335 if (isSomeChar!C) 1336 { 1337 alias S = immutable(C)[]; 1338 class Res: Parser!(B, S) 1339 { 1340 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 1341 { 1342 auto res = toParse; 1343 size_t i = 0; 1344 size_t len = toParse.left.length; 1345 while (i < len && test(res.left[i])) i++; 1346 if (keepTerminator && i < len) i++; 1347 res.parsed = res.left[0 .. i]; 1348 res.left = res.left[i .. $]; 1349 return res.succeed; 1350 } 1351 } 1352 return new Res(); 1353 } 1354 unittest 1355 { 1356 string str1 = "foo bar"; 1357 auto state1 = ParserState!string(str1); 1358 1359 auto word = charWhile!string(c => c != ' ', false); 1360 auto res1 = word.run(state1); 1361 assert(res1.parsed == "foo"); 1362 assert(res1.left == " bar"); 1363 /* Notice that the following does succeed, but with empty string parsed. */ 1364 auto res2 = word.run(res1); 1365 assert(res2.parsed == ""); 1366 } 1367 1368 /* Parses characters until a condition is met. */ 1369 auto 1370 charUntil(B, C = char)(bool delegate (C) test, bool keepTerminator = true) 1371 if (isSomeChar!C) 1372 { 1373 return charWhile!(B, C)(c => !test(c), keepTerminator); 1374 } 1375 1376 /* Uses the same parser while a condition is met. The condition function takes 1377 (in order) currently built value, parsed string and iteration (0-based). 1378 Always succeeds. 1379 */ 1380 auto 1381 repeatWhile(B, S = string)(bool delegate (B, const S, int) test, Parser!(B, S) p) 1382 if (isSomeString!S) 1383 { 1384 class Res: Parser!(B, S) 1385 { 1386 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 1387 { 1388 if (!toParse.success) return toParse.fail; 1389 1390 auto old = toParse; 1391 auto cur = toParse; 1392 int n = 0; 1393 while (true) { 1394 old = cur; 1395 cur = p.parse(cur); 1396 if (!cur.success) break; 1397 if (!test(cur.value, cur.parsed, n)) break; 1398 n++; 1399 } 1400 1401 return old.succeed; 1402 } 1403 } 1404 return new Res(); 1405 } 1406 unittest 1407 { 1408 import std.conv; 1409 1410 auto str = "12345"; 1411 auto state = ParserState!int(str); 1412 auto digit = singleChar!int(c => '0' <= c && c <= '9') 1413 % (res, s) => res * 10 + to!int(s); 1414 1415 auto p1 = repeatWhile!int((res, s, i) => i < 3, digit); 1416 auto res1 = p1.run(state); 1417 assert(res1.success); 1418 assert(res1.value == 123); 1419 1420 auto p2 = repeatWhile!int((res, s, i) => res < 100, digit); 1421 auto res2 = p2.run(state); 1422 assert(res2.success); 1423 assert(res2.value == 12); 1424 } 1425 1426 /* Uses the same parser until a condition is met. The condition function is the 1427 same as for 'repeatWhile'. 1428 */ 1429 auto 1430 repeatUntil(B, S = string)(bool delegate (B, const S, int) test, Parser!(B, S) p) 1431 if (isSomeString!S) 1432 { 1433 return repeatWhile((b, s, i) => !test(b, s, i), p); 1434 }