1 /* 2 * Hunt - A high-level D Programming Language Web framework that encourages rapid development and clean, pragmatic design. 3 * 4 * Copyright (C) 2015-2019, HuntLabs 5 * 6 * Website: https://www.huntlabs.net/ 7 * 8 * Licensed under the Apache-2.0 License. 9 * 10 */ 11 12 module hunt.framework.view.Lexer; 13 14 15 private 16 { 17 import hunt.framework.view.Exception : TemplateException; 18 19 import std.conv : to; 20 import std.traits : EnumMembers; 21 import std.utf; 22 import std.range; 23 } 24 25 26 enum Type 27 { 28 Unknown, 29 Raw, 30 Keyword, 31 Operator, 32 33 StmtBegin, 34 StmtEnd, 35 ExprBegin, 36 ExprEnd, 37 CmntBegin, 38 CmntEnd, 39 CmntInline, 40 41 Ident, 42 Integer, 43 Float, 44 Boolean, 45 String, 46 47 LParen, 48 RParen, 49 LSParen, 50 RSParen, 51 LBrace, 52 RBrace, 53 54 Dot, 55 Comma, 56 Colon, 57 58 EOL, 59 EOF, 60 } 61 62 63 enum Keyword : string 64 { 65 Unknown = "", 66 For = "for", 67 Recursive = "recursive", 68 EndFor = "endfor", 69 If = "if", 70 ElIf = "elif", 71 Else = "else", 72 EndIf = "endif", 73 Block = "block", 74 EndBlock = "endblock", 75 Extends = "extends", 76 Macro = "macro", 77 EndMacro = "endmacro", 78 Return = "return", 79 Call = "call", 80 EndCall = "endcall", 81 Filter = "filter", 82 EndFilter = "endfilter", 83 With = "with", 84 EndWith = "endwith", 85 Set = "set", 86 EndSet = "endset", 87 Ignore = "ignore", 88 Missing = "missing", 89 Import = "import", 90 From = "from", 91 As = "as", 92 Without = "without", 93 Context = "context", 94 Include = "include", 95 } 96 97 bool isBeginingKeyword(Keyword kw) 98 { 99 import std.algorithm : among; 100 101 return cast(bool)kw.among( 102 Keyword.If, 103 Keyword.Set, 104 Keyword.For, 105 Keyword.Block, 106 Keyword.Extends, 107 Keyword.Macro, 108 Keyword.Call, 109 Keyword.Filter, 110 Keyword.With, 111 Keyword.Include, 112 Keyword.Import, 113 Keyword.From, 114 ); 115 } 116 117 Keyword toKeyword(string key) 118 { 119 switch (key) with (Keyword) 120 { 121 static foreach(member; EnumMembers!Keyword) 122 { 123 case member: 124 return member; 125 } 126 default : 127 return Unknown; 128 } 129 } 130 131 132 bool isKeyword(string key) 133 { 134 return key.toKeyword != Keyword.Unknown; 135 } 136 137 138 bool isBoolean(string key) 139 { 140 return key == "true" || key == "false" || 141 key == "True" || key == "False"; 142 } 143 144 145 enum Operator : string 146 { 147 // The first in order is the first in priority 148 149 Eq = "==", 150 NotEq = "!=", 151 LessEq = "<=", 152 GreaterEq = ">=", 153 Less = "<", 154 Greater = ">", 155 156 And = "and", 157 Or = "or", 158 Not = "not", 159 160 In = "in", 161 Is = "is", 162 163 Assign = "=", 164 Filter = "|", 165 Concat = "~", 166 167 Plus = "+", 168 Minus = "-", 169 170 DivInt = "//", 171 DivFloat = "/", 172 Rem = "%", 173 Pow = "**", 174 Mul = "*", 175 } 176 177 178 Operator toOperator(string key) 179 { 180 switch (key) with (Operator) 181 { 182 static foreach(member; EnumMembers!Operator) 183 { 184 case member: 185 return member; 186 } 187 default : 188 return cast(Operator)""; 189 } 190 } 191 192 bool isOperator(string key) 193 { 194 switch (key) with (Operator) 195 { 196 static foreach(member; EnumMembers!Operator) 197 { 198 case member: 199 } 200 return true; 201 default : 202 return false; 203 } 204 } 205 206 bool isCmpOperator(Operator op) 207 { 208 import std.algorithm : among; 209 210 return cast(bool)op.among( 211 Operator.Eq, 212 Operator.NotEq, 213 Operator.LessEq, 214 Operator.GreaterEq, 215 Operator.Less, 216 Operator.Greater 217 ); 218 } 219 220 221 bool isIdentOperator(Operator op)() 222 { 223 import std.algorithm : filter; 224 import std.uni : isAlphaNum; 225 226 static if (!(cast(string)op).filter!isAlphaNum.empty) 227 return true; 228 else 229 return false; 230 } 231 232 233 struct Position 234 { 235 string filename; 236 ulong line, column; 237 238 string toString() 239 { 240 return filename ~ "(" ~ line.to!string ~ "," ~ column.to!string ~ ")"; 241 } 242 } 243 244 245 struct Token 246 { 247 enum EOF = Token(Type.EOF, Position("", 0, 0)); 248 249 Type type; 250 string value; 251 Position pos; 252 253 this (Type t, Position p) 254 { 255 type = t; 256 pos = p; 257 } 258 259 this(Type t, string v, Position p) 260 { 261 type = t; 262 value = v; 263 pos = p; 264 } 265 266 bool opEquals(Type type){ 267 return this.type == type; 268 } 269 270 bool opEquals(Keyword kw){ 271 return this.type == Type.Keyword && value == kw; 272 } 273 274 bool opEquals(Operator op){ 275 return this.type == Type.Operator && value == op; 276 } 277 } 278 279 280 struct Lexer( 281 string exprOpBegin, string exprOpEnd, 282 string stmtOpBegin, string stmtOpEnd, 283 string cmntOpBegin, string cmntOpEnd, 284 string stmtOpInline, string cmntOpInline) 285 { 286 static assert(exprOpBegin.length, "Expression begin operator can't be empty"); 287 static assert(exprOpEnd.length, "Expression end operator can't be empty"); 288 289 static assert(stmtOpBegin.length, "Statement begin operator can't be empty"); 290 static assert(stmtOpEnd.length, "Statement end operator can't be empty"); 291 292 static assert(cmntOpBegin.length, "Comment begin operator can't be empty"); 293 static assert(cmntOpEnd.length, "Comment end operator can't be empty"); 294 295 static assert(stmtOpInline.length, "Statement inline operator can't be empty"); 296 static assert(cmntOpInline.length, "Comment inline operator can't be empty"); 297 298 //TODO check uniq 299 300 301 enum stmtInline = stmtOpInline; 302 enum EOF = 255; 303 304 private 305 { 306 Position _beginPos; 307 bool _isReadingRaw; // State of reading raw data 308 bool _isInlineStmt; // State of reading inline statement 309 string _str; 310 string _filename; 311 ulong _line, _column; 312 } 313 314 this(string str, string filename = "") 315 { 316 _str = str; 317 _isReadingRaw = true; 318 _isInlineStmt = false; 319 _filename = filename; 320 _line = 1; 321 _column = 1; 322 } 323 324 Token nextToken() 325 { 326 _beginPos = position(); 327 328 // Try to read raw data 329 if (_isReadingRaw) 330 { 331 auto raw = skipRaw(); 332 _isReadingRaw = false; 333 if (raw.length) 334 return Token(Type.Raw, raw, _beginPos); 335 } 336 337 skipWhitespaces(); 338 _beginPos = position(); 339 340 // Check inline statement end 341 if (_isInlineStmt && 342 (tryToSkipNewLine() || cmntOpInline == sliceOp!cmntOpInline)) 343 { 344 _isInlineStmt = false; 345 _isReadingRaw = true; 346 return Token(Type.StmtEnd, "\n", _beginPos); 347 } 348 349 // Allow multiline inline statements with '\' 350 while (true) 351 { 352 if (_isInlineStmt && front == '\\') 353 { 354 pop(); 355 if (!tryToSkipNewLine()) 356 return Token(Type.Unknown, "\\", _beginPos); 357 } 358 else 359 break; 360 361 skipWhitespaces(); 362 _beginPos = position(); 363 } 364 365 // Check begin operators 366 if (exprOpBegin == sliceOp!exprOpBegin) 367 { 368 skipOp!exprOpBegin; 369 return Token(Type.ExprBegin, exprOpBegin, _beginPos); 370 } 371 if (stmtOpBegin == sliceOp!stmtOpBegin) 372 { 373 skipOp!stmtOpBegin; 374 return Token(Type.StmtBegin, stmtOpBegin, _beginPos); 375 } 376 if (cmntOpBegin == sliceOp!cmntOpBegin) 377 { 378 skipOp!cmntOpBegin; 379 skipComment(); 380 return Token(Type.CmntBegin, cmntOpBegin, _beginPos); 381 } 382 383 // Check end operators 384 if (exprOpEnd == sliceOp!exprOpEnd) 385 { 386 _isReadingRaw = true; 387 skipOp!exprOpEnd; 388 return Token(Type.ExprEnd, exprOpEnd, _beginPos); 389 } 390 if (stmtOpEnd == sliceOp!stmtOpEnd) 391 { 392 _isReadingRaw = true; 393 skipOp!stmtOpEnd; 394 return Token(Type.StmtEnd, stmtOpEnd, _beginPos); 395 } 396 if (cmntOpEnd == sliceOp!cmntOpEnd) 397 { 398 _isReadingRaw = true; 399 skipOp!cmntOpEnd; 400 return Token(Type.CmntEnd, cmntOpEnd, _beginPos); 401 } 402 403 // Check begin inline operators 404 if (cmntOpInline == sliceOp!cmntOpInline) 405 { 406 skipInlineComment(); 407 _isReadingRaw = true; 408 return Token(Type.CmntInline, cmntOpInline, _beginPos); 409 } 410 if (stmtOpInline == sliceOp!stmtOpInline) 411 { 412 skipOp!stmtOpInline; 413 _isInlineStmt = true; 414 return Token(Type.StmtBegin, stmtOpInline, _beginPos); 415 } 416 417 // Trying to read non-ident operators 418 static foreach(op; EnumMembers!Operator) 419 { 420 static if (!isIdentOperator!op) 421 { 422 if (cast(string)op == sliceOp!op) 423 { 424 skipOp!op; 425 return Token(Type.Operator, op, _beginPos); 426 } 427 } 428 } 429 430 // Check remainings 431 switch (front) 432 { 433 // End of file 434 case EOF: 435 return Token(Type.EOF, _beginPos); 436 437 438 // Identifier or keyword 439 case 'a': .. case 'z': 440 case 'A': .. case 'Z': 441 case '_': 442 auto ident = popIdent(); 443 if (ident.toKeyword != Keyword.Unknown) 444 return Token(Type.Keyword, ident, _beginPos); 445 else if (ident.isBoolean) 446 return Token(Type.Boolean, ident, _beginPos); 447 else if (ident.isOperator) 448 return Token(Type.Operator, ident, _beginPos); 449 else 450 return Token(Type.Ident, ident, _beginPos); 451 452 // Integer or float 453 case '0': .. case '9': 454 return popNumber(); 455 456 // String 457 case '"': 458 case '\'': 459 return Token(Type.String, popString(), _beginPos); 460 461 case '(': return Token(Type.LParen, popChar, _beginPos); 462 case ')': return Token(Type.RParen, popChar, _beginPos); 463 case '[': return Token(Type.LSParen, popChar, _beginPos); 464 case ']': return Token(Type.RSParen, popChar, _beginPos); 465 case '{': return Token(Type.LBrace, popChar, _beginPos); 466 case '}': return Token(Type.RBrace, popChar, _beginPos); 467 case '.': return Token(Type.Dot, popChar, _beginPos); 468 case ',': return Token(Type.Comma, popChar, _beginPos); 469 case ':': return Token(Type.Colon, popChar, _beginPos); 470 471 default: 472 return Token(Type.Unknown, popChar, _beginPos); 473 } 474 } 475 476 477 private: 478 479 480 dchar front() 481 { 482 if (_str.length > 0) 483 return _str.front; 484 else 485 return EOF; 486 } 487 488 489 dchar next() 490 { 491 auto chars = _str.take(2).array; 492 if (chars.length < 2) 493 return EOF; 494 return chars[1]; 495 } 496 497 dchar pop() 498 { 499 if (_str.length > 0) 500 { 501 auto ch = _str.front; 502 503 if (ch.isNewLine && !(ch == '\r' && next == '\n')) 504 { 505 _line++; 506 _column = 1; 507 } 508 else 509 _column++; 510 511 _str.popFront(); 512 return ch; 513 } 514 else 515 return EOF; 516 } 517 518 519 string popChar() 520 { 521 return pop.to!string; 522 } 523 524 525 string sliceOp(string op)() 526 { 527 enum length = op.walkLength; 528 529 if (length >= _str.length) 530 return _str; 531 else 532 return _str[0 .. length]; 533 } 534 535 536 void skipOp(string op)() 537 { 538 enum length = op.walkLength; 539 540 if (length >= _str.length) 541 _str = ""; 542 else 543 _str = _str[length .. $]; 544 _column += length; 545 } 546 547 548 Position position() 549 { 550 return Position(_filename, _line, _column); 551 } 552 553 554 void skipWhitespaces() 555 { 556 while (true) 557 { 558 if (front.isWhiteSpace) 559 { 560 pop(); 561 continue; 562 } 563 564 if (isFronNewLine) 565 { 566 // Return for handling NL as StmtEnd 567 if (_isInlineStmt) 568 return; 569 tryToSkipNewLine(); 570 continue; 571 } 572 573 return; 574 } 575 } 576 577 578 string popIdent() 579 { 580 string ident = ""; 581 while (true) 582 { 583 switch(front) 584 { 585 case 'a': .. case 'z': 586 case 'A': .. case 'Z': 587 case '0': .. case '9': 588 case '_': 589 ident ~= pop(); 590 break; 591 default: 592 return ident; 593 } 594 } 595 } 596 597 598 Token popNumber() 599 { 600 auto type = Type.Integer; 601 string number = ""; 602 603 while (true) 604 { 605 switch (front) 606 { 607 case '0': .. case '9': 608 number ~= pop(); 609 break; 610 case '.': 611 if (type == Type.Integer) 612 { 613 type = Type.Float; 614 number ~= pop(); 615 } 616 else 617 return Token(type, number, _beginPos); 618 break; 619 case '_': 620 pop(); 621 break; 622 default: 623 return Token(type, number, _beginPos); 624 } 625 } 626 } 627 628 629 string popString() 630 { 631 auto ch = pop(); 632 string str = ""; 633 auto prev = ch; 634 635 while (true) 636 { 637 if (front == EOF) 638 return str; 639 640 if (front == '\\') 641 { 642 pop(); 643 if (front != EOF) 644 { 645 prev = pop(); 646 switch (prev) 647 { 648 case 'n': str ~= '\n'; break; 649 case 'r': str ~= '\r'; break; 650 case 't': str ~= '\t'; break; 651 default: str ~= prev; break; 652 } 653 } 654 continue; 655 } 656 657 if (front == ch) 658 { 659 pop(); 660 return str; 661 } 662 663 prev = pop(); 664 str ~= prev; 665 } 666 } 667 668 669 string skipRaw() 670 { 671 string raw = ""; 672 673 while (true) 674 { 675 if (front == EOF) 676 return raw; 677 678 if (exprOpBegin == sliceOp!exprOpBegin) 679 return raw; 680 if (stmtOpBegin == sliceOp!stmtOpBegin) 681 return raw; 682 if (cmntOpBegin == sliceOp!cmntOpBegin) 683 return raw; 684 if (stmtOpInline == sliceOp!stmtOpInline) 685 return raw; 686 if (cmntOpInline == sliceOp!cmntOpInline) 687 return raw; 688 689 raw ~= pop(); 690 } 691 } 692 693 694 void skipComment() 695 { 696 while(front != EOF) 697 { 698 if (cmntOpEnd == sliceOp!cmntOpEnd) 699 return; 700 pop(); 701 } 702 } 703 704 705 void skipInlineComment() 706 { 707 auto column = _column; 708 709 while(front != EOF) 710 { 711 if (front == '\n') 712 { 713 // Eat new line if whole line is comment 714 if (column == 1) 715 pop(); 716 return; 717 } 718 pop(); 719 } 720 } 721 722 723 bool isFronNewLine() 724 { 725 auto ch = front; 726 return ch == '\r' || ch == '\n' || ch == 0x2028 || ch == 0x2029; 727 } 728 729 /// true if NL was skiped 730 bool tryToSkipNewLine() 731 { 732 switch (front) 733 { 734 case '\r': 735 pop(); 736 if (front == '\n') 737 pop(); 738 return true; 739 740 case '\n': 741 case 0x2028: 742 case 0x2029: 743 pop(); 744 return true; 745 746 default: 747 return false; 748 } 749 } 750 } 751 752 753 bool isWhiteSpace(dchar ch) 754 { 755 return ch == ' ' || ch == '\t' || ch == 0x205F || ch == 0x202F || ch == 0x3000 756 || ch == 0x00A0 || (ch >= 0x2002 && ch <= 0x200B); 757 } 758 759 bool isNewLine(dchar ch) 760 { 761 return ch == '\r' || ch == '\n' || ch == 0x2028 || ch == 0x2029; 762 }