1 /* Extras module 2 3 Provides some nice-to-have, often useful things. 4 5 It publicly imports 'core', so there's no need to import it if you use 6 extras. 7 8 */ 9 10 module parsed.extras; 11 12 import std.range; 13 import std.traits; 14 15 public import parsed.core; 16 17 /* ---------- single-character parsers ---------- */ 18 19 /* A char of whitespace. May optionally also match newlines. 20 21 Note: on Windows machines EOL is two chars, so there'll be leftover chars 22 after this parses away the first char in the pair. 23 */ 24 auto 25 whitespace(B, C = char)(bool acceptNewline = false) 26 if (isSomeChar!C) 27 { 28 bool acceptable(C ch) 29 { 30 import std.uni; 31 if (!ch.isWhite) return false; 32 if (!acceptNewline && (ch == '\n' || ch == '\r')) return false; 33 return true; 34 } 35 return singleChar!(B, C)(&acceptable); 36 } 37 unittest 38 { 39 string str1 = "foo bar"; 40 string str2 = "foo\nbar"; 41 string str3 = "foobar"; 42 43 auto p1 = literal!string("foo") 44 / whitespace!string; 45 assert(p1.match(str1)); 46 assert(!p1.match(str2)); 47 assert(!p1.match(str3)); 48 49 auto p2 = literal!string("foo") 50 / whitespace!string(true); 51 assert(p2.match(str1)); 52 assert(p2.match(str2)); 53 assert(!p2.match(str3)); 54 } 55 56 /* A char of anything but whitespace. */ 57 auto 58 nonwhite(B, C = char)() 59 if (isSomeChar!C) 60 { 61 import std.uni; 62 return singleChar!(B, C)(c => !c.isWhite); 63 } 64 unittest 65 { 66 string str = "foo bar"; 67 auto state = ParserState!string(str); 68 auto word = many(1, -1, nonwhite!string); 69 70 auto res1 = word.run(state); 71 assert(res1.success); 72 assert(res1.parsed == "foo"); 73 74 auto res2 = word.run(res1); 75 assert(!res2.success); 76 } 77 78 /* An alphanumeric char. */ 79 auto 80 alnum(B, C = char)() 81 if (isSomeChar!C) 82 { 83 import std.uni; 84 return singleChar!(B, C)(c => c.isAlphaNum); 85 } 86 unittest 87 { 88 string str = "foo12"; 89 auto state = ParserState!string(str); 90 91 auto p = many(1, -1, alnum!string); 92 auto res = p.run(state); 93 assert(res.success); 94 assert(res.parsed == "foo12"); 95 } 96 97 /* An alphabetic char. */ 98 auto 99 alpha(B, C = char)() 100 if (isSomeChar!C) 101 { 102 import std.uni; 103 return singleChar!(B, C)(c => c.isAlpha); 104 } 105 unittest 106 { 107 string str = "foo12"; 108 auto state = ParserState!string(str); 109 110 auto p = many(1, -1, alpha!string); 111 auto res = p.run(state); 112 assert(res.success); 113 assert(res.parsed == "foo"); 114 } 115 116 /* A decimal digit. */ 117 auto 118 digit(B, C = char)() 119 if (isSomeChar!C) 120 { 121 import std.uni; 122 return singleChar!(B, C)(c => c.isNumber); 123 } 124 unittest 125 { 126 string str = "123f"; 127 auto state = ParserState!string(str); 128 129 auto p = many(1, -1, digit!string); 130 auto res = p.run(state); 131 assert(res.success); 132 assert(res.parsed == "123"); 133 } 134 135 /* A hexadecimal digit. */ 136 auto 137 hexdigit(B, C = char)() 138 if (isSomeChar!C) 139 { 140 import std.uni; 141 return singleChar!(B, C)(c => c.isNumber 142 || ('a' <= c && c <= 'f') 143 || ('A' <= c && c <= 'F')); 144 145 } 146 147 /* A newline. */ 148 auto 149 newline(B, C = char)() 150 if (isSomeChar!C) 151 { 152 return singleChar!(B, C)(ch => ch == '\n' || ch == '\r'); 153 } 154 unittest 155 { 156 string str = "foo\nbar"; 157 158 auto p = literal!string("foo") 159 * newline!string 160 * literal!string("bar"); 161 assert(p.match(str)); 162 } 163 164 /* ---------- multi-character combinations of the above ---------- */ 165 166 /* Parses a whole line (with or without terminating newline). Note that 167 'keepTerminator' option only affects '.parsed', the terminating newline is 168 always removed from input. Always succeeds. */ 169 Parser!(B, immutable(C)[]) /* auto doesn't work here. */ 170 line(B, C = char)(bool keepTerminator) 171 if (isSomeChar!C) 172 { 173 import std.string; 174 alias S = immutable(C)[]; 175 176 auto res = charUntil!(B, C)(ch => ch == '\n' || ch == '\r', true); 177 if (keepTerminator) 178 return res; 179 else 180 return res / morph!(B, S)(s => s.chomp); 181 } 182 unittest 183 { 184 string str = "foo\nbar"; 185 auto state = ParserState!string(str); 186 auto p = line!string(false); 187 188 auto res1 = p.run(state); 189 assert(res1.success); 190 assert(res1.parsed == "foo"); 191 192 auto res2 = p.run(res1); 193 assert(res2.success); 194 assert(res2.parsed == "bar"); 195 } 196 197 /* Parses several whitespace characters, but no less than one. */ 198 auto 199 someWhite(B, C = char)(bool acceptNewlines = false) 200 if (isSomeChar!C) 201 { 202 return many(1, -1, whitespace!(B, C)(acceptNewlines)); 203 } 204 unittest 205 { 206 string str1 = "foo bar"; 207 string str2 = "foo\tbar"; 208 string str3 = "foobar"; 209 string str4 = "foo \n\nbar"; 210 211 auto p1 = literal!int("foo") / someWhite!int(false) / literal!int("bar"); 212 auto p2 = literal!int("foo") / someWhite!int(true) / literal!int("bar"); 213 214 assert(p1.match(str1)); 215 assert(p1.match(str2)); 216 assert(!p1.match(str3)); 217 assert(!p1.match(str4)); 218 219 assert(p2.match(str1)); 220 assert(p2.match(str2)); 221 assert(!p2.match(str3)); 222 assert(p2.match(str4)); 223 } 224 225 /* Parses zero or more whitespace characters. */ 226 auto 227 maybeWhite(B, C = char)(bool acceptNewlines = false) 228 if (isSomeChar!C) 229 { 230 return many(0, -1, whitespace!(B, C)(acceptNewlines)); 231 } 232 unittest 233 { 234 string str1 = "foo bar"; 235 string str2 = "foo\tbar"; 236 string str3 = "foobar"; 237 string str4 = "foo \n\nbar"; 238 239 auto p1 = literal!int("foo") / maybeWhite!int(false) / literal!int("bar"); 240 auto p2 = literal!int("foo") / maybeWhite!int(true) / literal!int("bar"); 241 242 assert(p1.match(str1)); 243 assert(p1.match(str2)); 244 assert(p1.match(str3)); 245 assert(!p1.match(str4)); 246 247 assert(p2.match(str1)); 248 assert(p2.match(str2)); 249 assert(p2.match(str3)); 250 assert(p2.match(str4)); 251 } 252 253 /* Parses several newline characters, but no less than one. */ 254 auto 255 someNewlines(B, C = char)() 256 if (isSomeChar!C) 257 { 258 return many(1, -1, newline!(B, C)); 259 } 260 unittest 261 { 262 string str1 = "foo\n\rbar"; 263 auto p1 = literal!int("foo") 264 / someNewlines!int 265 / literal!int("bar"); 266 assert (p1.match(str1)); 267 } 268 269 /* Parses zero or more newline characters. */ 270 auto 271 maybeNewlines(B, C = char)() 272 if (isSomeChar!C) 273 { 274 return many(0, -1, newline!(B, C)); 275 } 276 unittest 277 { 278 string str1 = "foo\n\rbar"; 279 auto p1 = literal!int("foo") 280 / someNewlines!int 281 / literal!int("bar"); 282 assert (p1.match(str1)); 283 } 284 285 enum Word 286 { 287 any, 288 alnum, 289 alpha 290 } 291 /* Parses either a sequence on non-whitespace characters (if given Word.any) or 292 a sequence of alphanumeric characters (if given Word.alnum) or a sequence of 293 alphabetic characters (if given Word.alpha) of length withing given bounds. 294 Any of the bounds can be negative, which means no limit on the corresponding 295 end. 296 */ 297 auto 298 word(B, C = char)(Word type, int minLength = 1, int maxLength = -1) 299 if (isSomeChar!C) 300 { 301 final switch (type) { 302 case Word.any: 303 return many(minLength, maxLength, nonwhite!(B, C)); 304 case Word.alnum: 305 return many(minLength, maxLength, alnum!(B, C)); 306 case Word.alpha: 307 return many(minLength, maxLength, alpha!(B, C)); 308 } 309 } 310 unittest 311 { 312 string str = "foo12( bar1 "; 313 auto state = ParserState!string(str); 314 315 auto p1 = word!string(Word.any); 316 auto res1 = p1.run(state); 317 assert(res1.success); 318 assert(res1.parsed == "foo12("); 319 320 auto p2 = word!string(Word.alnum); 321 auto res2 = p2.run(state); 322 assert(res2.success); 323 assert(res2.parsed == "foo12"); 324 325 auto p3 = word!string(Word.alpha); 326 auto res3 = p3.run(state); 327 assert(res3.success); 328 assert(res3.parsed == "foo"); 329 } 330 331 /* Parses a number. */ 332 auto 333 number(B, C = char)() 334 if (isSomeChar!C) 335 { 336 return many(1, -1, digit!(B, C)); 337 } 338 unittest 339 { 340 string str1 = "12 12"; 341 string str2 = "12f"; 342 string str3 = "foo"; 343 auto state1 = ParserState!string(str1); 344 auto state2 = ParserState!string(str2); 345 346 auto p = number!string; 347 auto res1 = p.run(state1); 348 assert(res1.success); 349 assert(res1.parsed == "12"); 350 351 auto res2 = p.run(state2); 352 assert(res2.success); 353 assert(res2.parsed == "12"); 354 355 assert(!p.match(str3)); 356 } 357 358 /* Parses a hexadecimal number. The number may or may not be prefixed by '0x'. 359 The prefix will *not* appear in '.parsed'. 360 */ 361 auto 362 hexnum(B, C = char)() 363 if (isSomeChar!C) 364 { 365 alias S = immutable(C)[]; 366 return maybe(literal!S("0x")) / many(1, -1, hexdigit!C); 367 } 368 369 /* ---------- misc ---------- */ 370 371 /* Parses something one or zero times. */ 372 auto 373 maybe(B, S = string)(Parser!(B, S) p) 374 if (isSomeString!S) 375 { 376 return many(0, 1, p); 377 } 378 unittest 379 { 380 string str1 = "foo bar"; 381 string str2 = "foobar"; 382 string str3 = "foo!bar"; 383 384 auto p = literal!string("foo") 385 / maybe(whitespace!string) 386 / literal!string("bar"); 387 388 assert(p.match(str1)); 389 assert(p.match(str2)); 390 assert(!p.match(str3)); 391 } 392 393 /* Parses text between balanced pair of given symbols. */ 394 auto 395 balanced(B, C = char)(C left, C right, bool keepPair = false) 396 if (isSomeChar!C) 397 { 398 alias S = immutable(C)[]; 399 class Res: Parser!(B, S) 400 { 401 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 402 { 403 if (!toParse.success) return toParse.fail; 404 /* There must be space for a pair. */ 405 if (toParse.left.length < 2) return toParse.fail; 406 if (toParse.left[0] != left) return toParse.fail; 407 408 int level = 1; 409 size_t parsed = 1; 410 size_t len = toParse.left.length; 411 while (level != 0 && parsed < len) { 412 C ch = toParse.left[parsed]; 413 /* Note that the order here is very important. It allows to 414 use the same character for left and right. 415 */ 416 if (ch == right) { 417 level--; 418 parsed++; 419 continue; 420 } 421 if (ch == left) { 422 level++; 423 parsed++; 424 continue; 425 } 426 parsed++; 427 } 428 429 if (level == 0) { 430 auto res = toParse; 431 if (keepPair) 432 res.parsed = toParse.left[0 .. parsed]; 433 else 434 res.parsed = toParse.left[1 .. parsed - 1]; 435 if (parsed < len) 436 res.left = res.left[parsed .. $]; 437 else 438 res.left = []; 439 return res.succeed; 440 } else { 441 return toParse.fail; 442 } 443 } /* parse */ 444 } /* Res */ 445 return new Res(); 446 } 447 unittest 448 { 449 string str1 = "(abcdef)"; 450 string str2 = "(ab(df))"; 451 string str3 = "((asdf)d)"; 452 string str4 = "(asdf)f"; 453 string str5 = "(asdff"; 454 string str6 = "/asdf/"; 455 auto state1 = ParserState!int(str1); 456 auto state2 = ParserState!int(str2); 457 auto state3 = ParserState!int(str3); 458 auto state4 = ParserState!int(str4); 459 auto state5 = ParserState!int(str5); 460 auto state6 = ParserState!int(str6); 461 auto p = balanced!int('(', ')'); 462 auto p2 = balanced!int('/', '/'); 463 464 auto res1 = p.run(state1); 465 assert(res1.success); 466 assert(res1.parsed == "abcdef"); 467 468 auto res2 = p.run(state2); 469 assert(res2.success); 470 assert(res2.parsed == "ab(df)"); 471 472 auto res3 = p.run(state3); 473 assert(res3.success); 474 assert(res3.parsed == "(asdf)d"); 475 476 auto res4 = p.run(state4); 477 assert(res4.success); 478 assert(res4.parsed == "asdf"); 479 480 auto res5 = p.run(state5); 481 assert(!res5.success); 482 483 auto res6 = p2.run(state6); 484 assert(res6.success); 485 assert(res6.parsed == "asdf"); 486 } 487 488 /* Parses text between balanced pair of bits that match given parsers. 'left' 489 and 'right' parsers are going to be run many times, so be careful with 490 building inside them. */ 491 auto 492 balanced(B, S = string)(Parser!(B, S) left, Parser!(B, S) right, bool keepPair = false) 493 if (isSomeString!S) 494 { 495 class Res: Parser!(B, S) 496 { 497 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 498 { 499 if (!toParse.success) return toParse.fail; 500 501 auto cur = toParse; 502 cur = left.run(cur); 503 if (!cur.success) return toParse.fail; 504 505 import std.stdio; 506 507 int level = 1; 508 size_t start = cur.parsed.length; 509 size_t parsed = start; 510 size_t lastRightLen; 511 size_t len = toParse.left.length; 512 while (level != 0 && parsed < len) { 513 /* Note the order. It allows using same parsers for left and 514 right. 515 */ 516 auto maybeRight = right.run(cur); 517 if (maybeRight.success) { 518 level--; 519 size_t rightLen = maybeRight.parsed.length; 520 parsed += rightLen; 521 lastRightLen = rightLen; 522 cur = maybeRight; 523 continue; 524 } 525 auto maybeLeft = left.run(cur); 526 if (maybeLeft.success) { 527 level++; 528 parsed += maybeLeft.parsed.length; 529 cur = maybeLeft; 530 continue; 531 } 532 parsed++; 533 cur.left = cur.left[1 .. $]; 534 } /* while level != 0 */ 535 if (level != 0) return toParse.fail; 536 auto res = toParse; 537 res.left = cur.left; 538 if (keepPair) 539 res.parsed = toParse.left[0 .. parsed]; 540 else 541 res.parsed = toParse.left[start .. parsed - lastRightLen]; 542 return res.succeed; 543 } /* parse */ 544 } /* Res */ 545 return new Res(); 546 } 547 unittest 548 { 549 string str1 = "foo 1 2 3 bar"; 550 string str2 = "foo 1 2 3"; 551 string str3 = "foo 1 2 3 foo"; 552 auto s1 = ParserState!int(str1); 553 auto s3 = ParserState!int(str3); 554 auto p = balanced!int(literal!int("foo"), literal!int("bar"), false); 555 auto p2 = balanced!int(literal!int("foo"), literal!int("foo"), false); 556 557 auto res1 = p.run(s1); 558 assert(res1.success); 559 assert(res1.parsed == " 1 2 3 "); 560 561 assert(!p.match(str2)); 562 563 auto res3 = p2.run(s3); 564 assert(res3.success); 565 assert(res3.parsed == " 1 2 3 "); 566 } 567 568 /* Parses text until a given parser succeeds. The part that matches the given 569 parser is removed from the input (but can optionally be left there). Fails 570 if nothing matches the parser. 571 */ 572 auto 573 upTo(B, S = string)( 574 Parser!(B, S) parser, 575 bool keepTerminator = false, 576 bool consumeTerminator = true) 577 if (isSomeString!S) 578 { 579 class Res: Parser!(B, S) 580 { 581 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 582 { 583 auto cur = toParse; 584 size_t parsed = 0; 585 while (cur.left.length > 0) { 586 auto maybeDone = parser.run(cur); 587 if (maybeDone.success) { 588 size_t finish = parsed; 589 if (keepTerminator) parsed += maybeDone.parsed.length; 590 if (consumeTerminator) finish = parsed; 591 return maybeDone.succeed(toParse.left[0 .. finish]); 592 } 593 parsed++; 594 cur.left = cur.left[1 .. $]; 595 } 596 return toParse.fail; 597 } 598 } 599 return new Res(); 600 } 601 unittest 602 { 603 string str1 = "foo bar! baz"; 604 string str2 = "foo bar"; 605 auto s1 = ParserState!int(str1); 606 auto s2 = ParserState!int(str2); 607 auto p = upTo(literal!int("!")); 608 609 auto res1 = p.run(s1); 610 assert(res1.success); 611 assert(res1.parsed == "foo bar"); 612 assert(res1.left == " baz"); 613 614 auto res2 = p.run(s2); 615 assert(!res2.success); 616 } 617 618 /* Behaves just like the 'literal' parser from core, but takes a range of 619 strings as patterns. */ 620 auto 621 multiliteral(B, S = string, R)(R range, bool consumeInput = true, bool caseSensitive = true) 622 if (isSomeString!S && isInputRange!R && is(Unqual!(ElementType!R): S)) 623 { 624 import std.algorithm; 625 import std.string; 626 627 auto literals = range.map!(x => caseSensitive ? x : x.toLower); 628 class Res: Parser!(B, S) 629 { 630 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 631 { 632 if (!toParse.success) return toParse.fail; 633 foreach (literal; literals) { 634 S checkAgainst = caseSensitive ? toParse.left : toParse.left.toLower; 635 if (checkAgainst.startsWith(literal)) { 636 if (consumeInput) 637 toParse.left = toParse.left[literal.length .. $]; 638 return toParse.succeed(literal); 639 } 640 } 641 return toParse.fail; 642 } 643 } 644 return new Res(); 645 } 646 unittest 647 { 648 import std.algorithm; 649 import std.string; 650 651 string str1 = "foo"; 652 string str2 = "bar"; 653 string str3 = "BAR"; 654 655 auto s1 = ParserState!int(str1); 656 auto s2 = ParserState!int(str2); 657 auto s3 = ParserState!int(str3); 658 659 auto p1 = multiliteral!int(["foo", "bar"]); 660 auto p2 = multiliteral!int(["foo", "bar"].map!(x => x.toUpper)); 661 662 auto res1_1 = p1.run(s1); 663 auto res1_2 = p1.run(s2); 664 auto res1_3 = p1.run(s3); 665 666 assert(res1_1.success); 667 assert(res1_1.parsed == "foo"); 668 669 assert(res1_2.success); 670 assert(res1_2.parsed == "bar"); 671 672 assert(!res1_3.success); 673 674 auto res2_1 = p2.run(s1); 675 auto res2_2 = p2.run(s2); 676 auto res2_3 = p2.run(s3); 677 678 assert(!res2_1.success); 679 assert(!res2_2.success); 680 assert(res2_3.success); 681 assert(res2_3.parsed == "BAR"); 682 } 683 684 /* Succeeds only if the input is empty. */ 685 auto 686 endOfInput(B, S = string)() 687 if (isSomeString!S) 688 { 689 class Res: Parser!(B, S) 690 { 691 override ParserState!(B, S) parse(ParserState!(B, S) toParse) 692 { 693 if (toParse.left == "") 694 return toParse.succeed(""); 695 else 696 return toParse.fail; 697 } 698 } 699 return new Res; 700 } 701 unittest 702 { 703 string str1 = "foobar"; 704 string str2 = "foo"; 705 706 auto s1 = ParserState!int(str1); 707 auto s2 = ParserState!int(str2); 708 709 auto p1 = literal!int("foo") 710 * endOfInput!int; 711 712 auto res1_1 = p1.run(s1); 713 assert(!res1_1.success); 714 715 auto res1_2 = p1.run(s2); 716 assert(res1_2.success); 717 assert(res1_2.parsed == "foo"); 718 }