1 /*
2  * Hunt - A high-level D Programming Language Web framework that encourages rapid development and clean, pragmatic design.
3  *
4  * Copyright (C) 2015-2019, HuntLabs
5  *
6  * Website: https://www.huntlabs.net/
7  *
8  * Licensed under the Apache-2.0 License.
9  *
10  */
11 
12 module hunt.framework.view.Lexer;
13 
14 
15 private
16 {
17     import hunt.framework.view.Exception : TemplateException;
18 
19     import std.conv : to;
20     import std.traits : EnumMembers;
21     import std.utf;
22     import std.range;
23 }
24 
25 
26 enum Type
27 {
28     Unknown,
29     Raw,
30     Keyword,
31     Operator,
32 
33     StmtBegin,
34     StmtEnd,
35     ExprBegin,
36     ExprEnd,
37     CmntBegin,
38     CmntEnd,
39     CmntInline,
40 
41     Ident,
42     Integer,
43     Float,
44     Boolean,
45     String,
46 
47     LParen,
48     RParen,
49     LSParen,
50     RSParen,
51     LBrace,
52     RBrace,
53 
54     Dot,
55     Comma,
56     Colon,
57 
58     EOL,
59     EOF,
60 }
61 
62 
63 enum Keyword : string
64 {
65     Unknown = "",
66     For = "for",
67     Recursive = "recursive",
68     EndFor = "endfor",
69     If = "if",
70     ElIf = "elif",
71     Else = "else",
72     EndIf = "endif",
73     Block = "block",
74     EndBlock = "endblock",
75     Extends = "extends",
76     Macro = "macro",
77     EndMacro = "endmacro",
78     Return = "return",
79     Call = "call",
80     EndCall = "endcall",
81     Filter = "filter",
82     EndFilter = "endfilter",
83     With = "with",
84     EndWith = "endwith",
85     Set = "set",
86     EndSet = "endset",
87     Ignore = "ignore",
88     Missing = "missing",
89     Import = "import",
90     From = "from",
91     As = "as",
92     Without = "without",
93     Context = "context",
94     Include = "include",
95 }
96 
97 bool isBeginingKeyword(Keyword kw)
98 {
99     import std.algorithm : among;
100 
101     return cast(bool)kw.among(
102                 Keyword.If,
103                 Keyword.Set,
104                 Keyword.For,
105                 Keyword.Block,
106                 Keyword.Extends,
107                 Keyword.Macro,
108                 Keyword.Call,
109                 Keyword.Filter,
110                 Keyword.With,
111                 Keyword.Include,
112                 Keyword.Import,
113                 Keyword.From,
114         );
115 }
116 
117 Keyword toKeyword(string key)
118 {
119     switch (key) with (Keyword)
120     {
121         static foreach(member; EnumMembers!Keyword)
122         {
123             case member:
124                 return member;
125         }
126         default :
127             return Unknown;
128     }
129 }
130 
131 
132 bool isKeyword(string key)
133 {
134     return key.toKeyword != Keyword.Unknown;
135 }
136 
137 
138 bool isBoolean(string key)
139 {
140     return key == "true" || key == "false" ||
141            key == "True" || key == "False";
142 }
143 
144 
145 enum Operator : string
146 {
147     // The first in order is the first in priority
148 
149     Eq = "==",
150     NotEq = "!=",
151     LessEq = "<=",
152     GreaterEq = ">=",
153     Less = "<",
154     Greater = ">",
155 
156     And = "and",
157     Or = "or",
158     Not = "not",
159 
160     In = "in",
161     Is = "is",
162 
163     Assign = "=",
164     Filter = "|",
165     Concat = "~",
166 
167     Plus = "+",
168     Minus = "-",
169 
170     DivInt = "//",
171     DivFloat = "/",
172     Rem = "%",
173     Pow = "**",
174     Mul = "*",
175 }
176 
177 
178 Operator toOperator(string key)
179 {
180     switch (key) with (Operator)
181     {
182         static foreach(member; EnumMembers!Operator)
183         {
184             case member:
185                 return member;
186         }
187         default :
188             return cast(Operator)"";
189     }
190 }
191 
192 bool isOperator(string key)
193 {
194     switch (key) with (Operator)
195     {
196         static foreach(member; EnumMembers!Operator)
197         {
198             case member:
199         }
200                 return true;
201         default :
202             return false;
203     }
204 }
205 
206 bool isCmpOperator(Operator op)
207 {
208     import std.algorithm : among;
209 
210     return cast(bool)op.among(
211             Operator.Eq,
212             Operator.NotEq,
213             Operator.LessEq,
214             Operator.GreaterEq,
215             Operator.Less,
216             Operator.Greater
217         );
218 }
219 
220 
221 bool isIdentOperator(Operator op)()
222 {
223     import std.algorithm : filter;
224     import std.uni : isAlphaNum;
225 
226     static if (!(cast(string)op).filter!isAlphaNum.empty)
227         return true;
228     else
229         return false;
230 }
231 
232 
233 struct Position
234 {
235     string filename;
236     ulong line, column;
237 
238     string toString()
239     {
240         return filename ~ "(" ~ line.to!string ~ "," ~ column.to!string ~ ")";
241     }
242 }
243 
244 
245 struct Token
246 {
247     enum EOF = Token(Type.EOF, Position("", 0, 0));
248 
249     Type type;
250     string value;
251     Position pos;
252 
253     this (Type t, Position p)
254     {
255         type = t;
256         pos = p;
257     }
258 
259     this(Type t, string v, Position p)
260     {
261         type = t;
262         value = v;
263         pos = p;
264     }
265 
266     bool opEquals(Type type){
267         return this.type == type;
268     }
269 
270     bool opEquals(Keyword kw){
271         return this.type == Type.Keyword && value == kw;
272     }
273 
274     bool opEquals(Operator op){
275         return this.type == Type.Operator && value == op;
276     }
277 }
278 
279 
280 struct Lexer(
281         string exprOpBegin, string exprOpEnd,
282         string stmtOpBegin, string stmtOpEnd,
283         string cmntOpBegin, string cmntOpEnd,
284         string stmtOpInline, string cmntOpInline)
285 {
286     static assert(exprOpBegin.length, "Expression begin operator can't be empty");
287     static assert(exprOpEnd.length, "Expression end operator can't be empty");
288 
289     static assert(stmtOpBegin.length, "Statement begin operator can't be empty");
290     static assert(stmtOpEnd.length, "Statement end operator can't be empty");
291 
292     static assert(cmntOpBegin.length, "Comment begin operator can't be empty");
293     static assert(cmntOpEnd.length, "Comment end operator can't be empty");
294 
295     static assert(stmtOpInline.length, "Statement inline operator can't be empty");
296     static assert(cmntOpInline.length, "Comment inline operator can't be empty");
297 
298     //TODO check uniq
299 
300 
301     enum stmtInline = stmtOpInline;
302     enum EOF = 255;
303 
304     private
305     {
306         Position _beginPos;
307         bool _isReadingRaw; // State of reading raw data
308         bool _isInlineStmt; // State of reading inline statement
309         string _str;
310         string _filename;
311         ulong _line, _column;
312     }
313 
314     this(string str, string filename = "")
315     {
316         _str = str;
317         _isReadingRaw = true;
318         _isInlineStmt = false;
319         _filename = filename;
320         _line = 1;
321         _column = 1;
322     }
323 
324     Token nextToken()
325     {
326         _beginPos = position();
327 
328         // Try to read raw data
329         if (_isReadingRaw)
330         {
331             auto raw = skipRaw();
332             _isReadingRaw = false;
333             if (raw.length)
334                 return Token(Type.Raw, raw, _beginPos);
335         }
336 
337         skipWhitespaces();
338         _beginPos = position();
339 
340         // Check inline statement end
341         if (_isInlineStmt &&
342             (tryToSkipNewLine() || cmntOpInline == sliceOp!cmntOpInline))
343         {
344             _isInlineStmt = false;
345             _isReadingRaw = true;
346             return Token(Type.StmtEnd, "\n", _beginPos);
347         }
348 
349         // Allow multiline inline statements with '\'
350         while (true)
351         {
352             if (_isInlineStmt && front == '\\')
353             {
354                 pop();
355                 if (!tryToSkipNewLine())
356                     return Token(Type.Unknown, "\\", _beginPos);
357             }
358             else
359                 break;
360 
361             skipWhitespaces();
362             _beginPos = position();
363         }
364 
365         // Check begin operators
366         if (exprOpBegin == sliceOp!exprOpBegin)
367         {
368             skipOp!exprOpBegin;
369             return Token(Type.ExprBegin, exprOpBegin, _beginPos);
370         }
371         if (stmtOpBegin == sliceOp!stmtOpBegin)
372         {
373             skipOp!stmtOpBegin;
374             return Token(Type.StmtBegin, stmtOpBegin, _beginPos);
375         }
376         if (cmntOpBegin == sliceOp!cmntOpBegin)
377         {
378             skipOp!cmntOpBegin;
379             skipComment();
380             return Token(Type.CmntBegin, cmntOpBegin, _beginPos);
381         }
382 
383         // Check end operators
384         if (exprOpEnd == sliceOp!exprOpEnd)
385         {
386             _isReadingRaw = true;
387             skipOp!exprOpEnd;
388             return Token(Type.ExprEnd, exprOpEnd, _beginPos);
389         }
390         if (stmtOpEnd == sliceOp!stmtOpEnd)
391         {
392             _isReadingRaw = true;
393             skipOp!stmtOpEnd;
394             return Token(Type.StmtEnd, stmtOpEnd, _beginPos);
395         }
396         if (cmntOpEnd == sliceOp!cmntOpEnd)
397         {
398             _isReadingRaw = true;
399             skipOp!cmntOpEnd;
400             return Token(Type.CmntEnd, cmntOpEnd, _beginPos);
401         }
402 
403         // Check begin inline operators
404         if (cmntOpInline == sliceOp!cmntOpInline)
405         {
406             skipInlineComment();
407             _isReadingRaw = true;
408             return Token(Type.CmntInline, cmntOpInline, _beginPos);
409         }
410         if (stmtOpInline == sliceOp!stmtOpInline)
411         {
412             skipOp!stmtOpInline;
413             _isInlineStmt = true;
414             return Token(Type.StmtBegin, stmtOpInline, _beginPos);
415         }
416 
417         // Trying to read non-ident operators
418         static foreach(op; EnumMembers!Operator)
419         {
420             static if (!isIdentOperator!op)
421             {
422                 if (cast(string)op == sliceOp!op)
423                 {
424                     skipOp!op;
425                     return Token(Type.Operator, op, _beginPos);
426                 }
427             }
428         }
429 
430         // Check remainings 
431         switch (front)
432         {
433             // End of file
434             case EOF:
435                 return Token(Type.EOF, _beginPos);
436 
437 
438             // Identifier or keyword
439             case 'a': .. case 'z':
440             case 'A': .. case 'Z':
441             case '_':
442                 auto ident = popIdent();
443                 if (ident.toKeyword != Keyword.Unknown)
444                     return Token(Type.Keyword, ident, _beginPos);
445                 else if (ident.isBoolean)
446                     return Token(Type.Boolean, ident, _beginPos);
447                 else if (ident.isOperator)
448                     return Token(Type.Operator, ident, _beginPos);
449                 else
450                     return Token(Type.Ident, ident, _beginPos);
451 
452             // Integer or float
453             case '0': .. case '9':
454                 return popNumber();
455 
456             // String
457             case '"':
458             case '\'':
459                 return Token(Type.String, popString(), _beginPos);
460 
461             case '(': return Token(Type.LParen, popChar, _beginPos);
462             case ')': return Token(Type.RParen, popChar, _beginPos);
463             case '[': return Token(Type.LSParen, popChar, _beginPos);
464             case ']': return Token(Type.RSParen, popChar, _beginPos);
465             case '{': return Token(Type.LBrace, popChar, _beginPos);
466             case '}': return Token(Type.RBrace, popChar, _beginPos);
467             case '.': return Token(Type.Dot, popChar, _beginPos);
468             case ',': return Token(Type.Comma, popChar, _beginPos);
469             case ':': return Token(Type.Colon, popChar, _beginPos);
470 
471             default:
472                 return Token(Type.Unknown, popChar, _beginPos);
473         }
474     }
475 
476 
477 private:
478 
479 
480     dchar front()
481     {
482         if (_str.length > 0)
483             return _str.front;
484         else
485             return EOF;
486     }
487 
488 
489     dchar next()
490     {
491         auto chars = _str.take(2).array;
492         if (chars.length < 2)
493             return EOF;
494         return chars[1];
495     }
496 
497     dchar pop()
498     {
499         if (_str.length > 0)
500         {
501             auto ch  = _str.front;
502 
503             if (ch.isNewLine && !(ch == '\r' && next == '\n'))
504             {
505                 _line++;
506                 _column = 1;
507             }
508             else
509                 _column++;
510 
511             _str.popFront();
512             return ch;
513         } 
514         else
515             return EOF;
516     }
517 
518 
519     string popChar()
520     {
521         return pop.to!string;
522     }
523 
524 
525     string sliceOp(string op)()
526     {
527         enum length = op.walkLength;
528 
529         if (length >= _str.length)
530             return _str;
531         else
532             return _str[0 .. length];
533     }
534 
535 
536     void skipOp(string op)()
537     {
538         enum length = op.walkLength;
539 
540         if (length >= _str.length)
541             _str = "";
542         else
543             _str = _str[length .. $];
544         _column += length;
545     }
546 
547 
548     Position position()
549     {
550         return Position(_filename, _line, _column);
551     }
552 
553 
554     void skipWhitespaces()
555     {
556         while (true)
557         {
558             if (front.isWhiteSpace)
559             {
560                 pop();
561                 continue;
562             }
563 
564             if (isFronNewLine)
565             {
566                 // Return for handling NL as StmtEnd
567                 if (_isInlineStmt)
568                     return;
569                 tryToSkipNewLine();
570                 continue;
571             }
572 
573             return;
574         }
575     }
576 
577 
578     string popIdent()
579     {
580         string ident = "";
581         while (true)
582         {
583             switch(front)
584             {
585                 case 'a': .. case 'z':
586                 case 'A': .. case 'Z':
587                 case '0': .. case '9':
588                 case '_':
589                     ident ~= pop();
590                     break;
591                 default:
592                     return ident;
593             }
594         }
595     }
596 
597 
598     Token popNumber()
599     {
600         auto type = Type.Integer;
601         string number = "";
602 
603         while (true)
604         {
605             switch (front)
606             {
607                 case '0': .. case '9':
608                     number ~= pop();
609                     break;
610                 case '.':
611                     if (type == Type.Integer)
612                     {
613                         type = Type.Float;
614                         number ~= pop();
615                     }
616                     else
617                         return Token(type, number, _beginPos);
618                     break;
619                 case '_':
620                     pop();
621                     break;
622                 default:
623                     return Token(type, number, _beginPos);
624             }
625         }
626     }
627 
628 
629     string popString()
630     {
631         auto ch = pop();
632         string str = "";
633         auto prev = ch;
634 
635         while (true)
636         {
637             if (front == EOF)
638                 return str;
639 
640             if (front == '\\')
641             {
642                 pop();
643                 if (front != EOF)
644                 {
645                     prev = pop();
646                     switch (prev)
647                     {
648                         case 'n': str ~= '\n'; break;
649                         case 'r': str ~= '\r'; break;
650                         case 't': str ~= '\t'; break;
651                         default: str ~= prev; break;
652                     }
653                 }
654                 continue;
655             }
656 
657             if (front == ch)
658             {
659                 pop();
660                 return str;
661             }
662 
663             prev = pop();
664             str ~= prev;
665         }
666     }
667 
668 
669     string skipRaw()
670     {
671         string raw = "";
672 
673         while (true)
674         {
675             if (front == EOF)
676                 return raw;
677 
678             if (exprOpBegin == sliceOp!exprOpBegin)
679                 return raw;
680             if (stmtOpBegin == sliceOp!stmtOpBegin)
681                 return raw;
682             if (cmntOpBegin == sliceOp!cmntOpBegin)
683                 return raw;
684             if (stmtOpInline == sliceOp!stmtOpInline)
685                 return raw;
686             if (cmntOpInline == sliceOp!cmntOpInline)
687                 return raw;
688 
689             raw ~= pop();
690         }
691     }
692 
693 
694     void skipComment()
695     {
696         while(front != EOF)
697         {
698             if (cmntOpEnd == sliceOp!cmntOpEnd)
699                 return;
700             pop();
701         }
702     }
703 
704 
705     void skipInlineComment()
706     {
707         auto column = _column;
708 
709         while(front != EOF)
710         {
711             if (front == '\n')
712             {
713                 // Eat new line if whole line is comment
714                 if (column == 1)
715                     pop();
716                 return;
717             }
718             pop();
719         }
720     }
721 
722 
723     bool isFronNewLine()
724     {
725         auto ch = front;
726         return ch == '\r' || ch == '\n' || ch == 0x2028 || ch == 0x2029; 
727     }
728 
729     /// true if NL was skiped
730     bool tryToSkipNewLine()
731     {
732         switch (front)
733         {
734             case '\r':
735                 pop();
736                 if (front == '\n')
737                     pop();
738                 return true;
739 
740             case '\n':
741             case 0x2028:
742             case 0x2029:
743                 pop();
744                 return true;
745 
746             default:
747                 return false;
748         }
749     }
750 }
751 
752 
753 bool isWhiteSpace(dchar ch)
754 {
755     return ch == ' ' || ch == '\t' || ch == 0x205F || ch == 0x202F || ch == 0x3000
756            || ch == 0x00A0 || (ch >= 0x2002 && ch <= 0x200B);
757 }
758 
759 bool isNewLine(dchar ch)
760 {
761     return ch == '\r' || ch == '\n' || ch == 0x2028 || ch == 0x2029;
762 }