Modules |
Files |
Inheritance Tree |
Inheritance Graph |
Name Index |
Config
File: Synopsis/Parser/C++/occ/token.cc
1| /*
2| Copyright (C) 1997-2000 Shigeru Chiba, University of Tsukuba.
3|
4| Permission to use, copy, distribute and modify this software and
5| its documentation for any purpose is hereby granted without fee,
6| provided that the above copyright notice appear in all copies and that
7| both that copyright notice and this permission notice appear in
8| supporting documentation.
9|
10| Shigeru Chiba makes no representations about the suitability of this
11| software for any purpose. It is provided "as is" without express or
12| implied warranty.
13| */
14| /*
15| Copyright (c) 1995, 1996 Xerox Corporation.
16| All Rights Reserved.
17|
18| Use and copying of this software and preparation of derivative works
19| based upon this software are permitted. Any copy of this software or
20| of any derivative work must include the above copyright notice of
21| Xerox Corporation, this paragraph and the one after it. Any
22| distribution of this software or derivative works must comply with all
23| applicable United States export control laws.
24|
25| This software is made available AS IS, and XEROX CORPORATION DISCLAIMS
26| ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE
27| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28| PURPOSE, AND NOTWITHSTANDING ANY OTHER PROVISION CONTAINED HEREIN, ANY
29| LIABILITY FOR DAMAGES RESULTING FROM THE SOFTWARE OR ITS USE IS
30| EXPRESSLY DISCLAIMED, WHETHER ARISING IN CONTRACT, TORT (INCLUDING
31| NEGLIGENCE) OR STRICT LIABILITY, EVEN IF XEROX CORPORATION IS ADVISED
32| OF THE POSSIBILITY OF SUCH DAMAGES.
33| */
34|
35| #include <cstdlib>
36| #include <cstring>
37| #include <iostream>
38| #include "token.h"
39| #include "hash.h"
40| #include "ptree.h"
41| #include "buffer.h"
42|
43| #if defined(_PARSE_VCC)
44| #define _MSC_VER
45| #endif
46|
47| #if defined(_MSC_VER)
48| #include <assert.h> // for assert in InitializeO
49| #endif
50|
51| extern BOOL regularCpp;
52| static void InitializeOtherKeywords();
53|
54| #ifdef TEST
55|
56| #ifdef __GNUG__
57| #define token(x)
58| #else
59| #define token(x) (
60| #endif
61|
62| #else
63|
64| #define token(x)
65|
66| #endif
67|
68| // class Lex
69|
70| HashTable* Lex::user_keywords = nil;
71| Ptree* Lex::comments = nil;
72|
73| Lex::Lex(Program* prog) : fifo(this)
74| {
75| file = prog;
76| prog->Rewind();
77| last_token = '\n';
78| tokenp = 0;
79| token_len = 0;
80|
81| // Re-init incase used multiple times by Synopsis
82| comments = nil;
83| user_keywords = nil;
84|
85| InitializeOtherKeywords();
86| }
87|
88| char* Lex::Save()
89| {
90| char* pos;
91| int len;
92|
93| fifo.Peek(0, pos, len);
94| return pos;
95| }
96|
97| void Lex::Restore(char* pos)
98| {
99| last_token = '\n';
100| tokenp = 0;
101| token_len = 0;
102| fifo.Clear();
103| Rewind(pos);
104| }
105|
106| // ">>" is either the shift operator or double closing brackets.
107|
108| void Lex::GetOnlyClosingBracket(Token& t)
109| {
110| Restore(t.ptr + 1);
111| }
112|
113| uint Lex::LineNumber(char* pos, char*& ptr, int& len)
114| {
115| return file->LineNumber(pos, ptr, len);
116| }
117|
118| int Lex::GetToken(Token& t)
119| {
120| t.kind = fifo.Pop(t.ptr, t.len);
121| return t.kind;
122| }
123|
124| int Lex::LookAhead(int offset)
125| {
126| return fifo.Peek(offset);
127| }
128|
129| int Lex::LookAhead(int offset, Token& t)
130| {
131| t.kind = fifo.Peek(offset, t.ptr, t.len);
132| return t.kind;
133| }
134|
135| char* Lex::TokenPosition()
136| {
137| return (char*)file->Read(Tokenp());
138| }
139|
140| char Lex::Ref(uint i)
141| {
142| return file->Ref(i);
143| }
144|
145| void Lex::Rewind(char* p)
146| {
147| file->Rewind(p - file->Read(0));
148| }
149|
150| bool Lex::RecordKeyword(char* keyword, int token)
151| {
152| int index;
153| char* str;
154|
155| if(keyword == nil)
156| return FALSE;
157|
158| str = new(GC) char[strlen(keyword) + 1];
159| strcpy(str, keyword);
160|
161| if(user_keywords == nil)
162| user_keywords = new HashTable;
163|
164| if(user_keywords->AddEntry(str, (HashValue)token, &index) >= 0)
165| return TRUE;
166| else
167| return bool(user_keywords->Peek(index) == (HashValue)token);
168| }
169|
170| bool Lex::Reify(Ptree* t, unsigned int& value)
171| {
172| if(t == nil || !t->IsLeaf())
173| return FALSE;
174|
175| char* p = t->GetPosition();
176| int len = t->GetLength();
177| value = 0;
178| if(len > 2 && *p == '0' && is_xletter(p[1])){
179| for(int i = 2; i < len; ++i){
180| char c = p[i];
181| if(is_digit(c))
182| value = value * 0x10 + (c - '0');
183| else if('A' <= c && c <= 'F')
184| value = value * 0x10 + (c - 'A' + 10);
185| else if('a' <= c && c <= 'f')
186| value = value * 0x10 + (c - 'a' + 10);
187| else if(is_int_suffix(c))
188| break;
189| else
190| return FALSE;
191| }
192|
193| return TRUE;
194| }
195| else if(len > 0 && is_digit(*p)){
196| for(int i = 0; i < len; ++i){
197| char c = p[i];
198| if(is_digit(c))
199| value = value * 10 + c - '0';
200| else if(is_int_suffix(c))
201| break;
202| else
203| return FALSE;
204| }
205|
206| return TRUE;
207| }
208| else
209| return FALSE;
210| }
211|
212| // Reify() doesn't interpret an escape character.
213|
214| bool Lex::Reify(Ptree* t, char*& str)
215| {
216| if(t == nil || !t->IsLeaf())
217| return FALSE;
218|
219| char* p = t->GetPosition();
220| int length = t->GetLength();
221| if(*p != '"')
222| return FALSE;
223| else{
224| str = new(GC) char[length];
225| char* sp = str;
226| for(int i = 1; i < length; ++i)
227| if(p[i] != '"'){
228| *sp++ = p[i];
229| if(p[i] == '\\' && i + 1 < length)
230| *sp++ = p[++i];
231| }
232| else
233| while(++i < length && p[i] != '"')
234| ;
235|
236| *sp = '\0';
237| return TRUE;
238| }
239| }
240|
241| // class TokenFifo
242|
243| Lex::TokenFifo::TokenFifo(Lex* l)
244| {
245| lex = l;
246| size = 16;
247| ring = new (GC) Slot[size];
248| head = tail = 0;
249| }
250|
251| Lex::TokenFifo::~TokenFifo()
252| {
253|
254| }
255|
256| void Lex::TokenFifo::Clear()
257| {
258| head = tail = 0;
259| }
260|
261| void Lex::TokenFifo::Push(int token, char* pos, int len)
262| {
263| const int Plus = 16;
264| ring[head].token = token;
265| ring[head].pos = pos;
266| ring[head].len = len;
267| head = (head + 1) % size;
268| if(head == tail){
269| Slot* ring2 = new (GC) Slot[size + Plus];
270| int i = 0;
271| do{
272| ring2[i++] = ring[tail];
273| tail = (tail + 1) % size;
274| } while(head != tail);
275| head = i;
276| tail = 0;
277| size += Plus;
278| // delete [] ring;
279| ring = ring2;
280| }
281| }
282|
283| int Lex::TokenFifo::Pop(char*& pos, int& len)
284| {
285| if(head == tail)
286| return lex->ReadToken(pos, len);
287|
288| int t = ring[tail].token;
289| pos = ring[tail].pos;
290| len = ring[tail].len;
291| tail = (tail + 1) % size;
292| return t;
293| }
294|
295| int Lex::TokenFifo::Peek(int offset)
296| {
297| return ring[Peek2(offset)].token;
298| }
299|
300| int Lex::TokenFifo::Peek(int offset, char*& pos, int& len)
301| {
302| int cur = Peek2(offset);
303| pos = ring[cur].pos;
304| len = ring[cur].len;
305| return ring[cur].token;
306| }
307|
308| int Lex::TokenFifo::Peek2(int offset)
309| {
310| int i;
311| int cur = tail;
312|
313| for(i = 0; i <= offset; ++i){
314| if(head == cur){
315| while(i++ <= offset){
316| char* p;
317| int l;
318| int t = lex->ReadToken(p, l);
319| Push(t, p, l);
320| }
321|
322| break;
323| }
324|
325| cur = (cur + 1) % size;
326| }
327|
328| return (tail + offset) % size;
329| }
330|
331| /*
332| Lexical Analyzer
333| */
334|
335| int Lex::ReadToken(char*& ptr, int& len)
336| {
337| int t;
338|
339| for(;;){
340| t = ReadLine();
341|
342| if(t == Ignore)
343| continue;
344|
345| last_token = t;
346|
347| #if defined(__GNUG__) || defined(_GNUG_SYNTAX)
348| if(t == ATTRIBUTE){
349| SkipAttributeToken();
350| continue;
351| }
352| else if(t == EXTENSION){
353| t = SkipExtensionToken(ptr, len);
354| if(t == Ignore)
355| continue;
356| else
357| return t;
358| }
359| #endif
360| #if defined(_MSC_VER)
361| if(t == ASM){
362| SkipAsmToken();
363| continue;
364| }
365| else if(t == DECLSPEC){
366| SkipDeclspecToken();
367| continue;
368| }
369| #endif
370| if(t != '\n')
371| break;
372| }
373|
374| ptr = TokenPosition();
375| len = TokenLen();
376| return t;
377| }
378|
379| // SkipAttributeToken() skips __attribute__(...), ___asm__(...), ...
380|
381| void Lex::SkipAttributeToken()
382| {
383| char c;
384|
385| do{
386| c = file->Get();
387| }while(c != '(' && c != '\0');
388|
389| int i = 1;
390| do{
391| c = file->Get();
392| if(c == '(')
393| ++i;
394| else if(c == ')')
395| --i;
396| else if(c == '\0')
397| break;
398| } while(i > 0);
399| }
400|
401| // SkipExtensionToken() skips __extension__(...).
402|
403| int Lex::SkipExtensionToken(char*& ptr, int& len)
404| {
405| ptr = TokenPosition();
406| len = TokenLen();
407|
408| char c;
409|
410| do{
411| c = file->Get();
412| }while(is_blank(c) || c == '\n');
413|
414| if(c != '('){
415| file->Unget();
416| return Ignore;
417| }
418|
419| int i = 1;
420| do{
421| c = file->Get();
422| if(c == '(')
423| ++i;
424| else if(c == ')')
425| --i;
426| else if(c == '\0')
427| break;
428| } while(i > 0);
429|
430| return Identifier;
431| }
432|
433| #if defined(_MSC_VER)
434|
435| #define CHECK_END_OF_INSTRUCTION(C, EOI) \
436| if (C == '\0') return; \
437| if (strchr(EOI, C)) { \
438| this->file->Unget(); \
439| return; \
440| }
441|
442| /* SkipAsmToken() skips __asm ...
443| You can have the following :
444|
445| Just count the '{' and '}' and it should be ok
446| __asm { mov ax,1
447| mov bx,1 }
448|
449| Stop when EOL found. Note that the first ';' after
450| an __asm instruction is an ASM comment !
451| int v; __asm mov ax,1 __asm mov bx,1; v=1;
452|
453| Stop when '}' found
454| if (cond) {__asm mov ax,1 __asm mov bx,1}
455|
456| and certainly more...
457| */
458| void Lex::SkipAsmToken()
459| {
460| char c;
461|
462| do{
463| c = file->Get();
464| CHECK_END_OF_INSTRUCTION(c, "");
465| }while(is_blank(c) || c == '\n');
466|
467| if(c == '{'){
468| int i = 1;
469| do{
470| c = file->Get();
471| CHECK_END_OF_INSTRUCTION(c, "");
472| if(c == '{')
473| ++i;
474| else if(c == '}')
475| --i;
476| } while(i > 0);
477| }
478| else{
479| for(;;){
480| CHECK_END_OF_INSTRUCTION(c, "}\n");
481| c = file->Get();
482| }
483| }
484| }
485|
486| // SkipDeclspecToken() skips __declspec(...).
487|
488| void Lex::SkipDeclspecToken()
489| {
490| char c;
491|
492| do{
493| c = file->Get();
494| CHECK_END_OF_INSTRUCTION(c, "");
495| }while(is_blank(c));
496|
497| if (c == '(') {
498| int i = 1;
499| do{
500| c = file->Get();
501| CHECK_END_OF_INSTRUCTION(c, "};");
502| if(c == '(')
503| ++i;
504| else if(c == ')')
505| --i;
506| }while(i > 0);
507| }
508| }
509|
510| #undef CHECK_END_OF_INSTRUCTION
511|
512| #endif /* _MSC_VER */
513|
514| char Lex::GetNextNonWhiteChar()
515| {
516| char c;
517|
518| for(;;){
519| do{
520| c = file->Get();
521| }while(is_blank(c));
522|
523| if(c != '\\')
524| break;
525|
526| c = file->Get();
527| if(c != '\n' && c!= '\r') {
528| file->Unget();
529| break;
530| }
531| }
532|
533| return c;
534| }
535|
536| int Lex::ReadLine()
537| {
538| char c;
539| uint top;
540|
541| c = GetNextNonWhiteChar();
542|
543| tokenp = top = file->GetCurPos();
544| if(c == '\0'){
545| file->Unget();
546| return '\0';
547| }
548| else if(c == '\n')
549| return '\n';
550| else if(c == '#' && last_token == '\n'){
551| if(ReadLineDirective())
552| return '\n';
553| else{
554| file->Rewind(top + 1);
555| token_len = 1;
556| return SingleCharOp(c);
557| }
558| }
559| else if(c == '\'' || c == '"'){
560| if(c == '\''){
561| if(ReadCharConst(top))
562| return token(CharConst);
563| }
564| else{
565| if(ReadStrConst(top))
566| return token(StringL);
567| }
568|
569| file->Rewind(top + 1);
570| token_len = 1;
571| return SingleCharOp(c);
572| }
573| else if(is_digit(c))
574| return ReadNumber(c, top);
575| else if(c == '.'){
576| c = file->Get();
577| if(is_digit(c))
578| return ReadFloat(top);
579| else{
580| file->Unget();
581| return ReadSeparator('.', top);
582| }
583| }
584| else if(is_letter(c)) {
585| if (c == 'L') {
586| // May be a L"const" type string
587| char next = file->Get();
588| if (next == '"') {
589| if (ReadStrConst(top))
590| return token(StringL);
591| }
592| file->Unget();
593| }
594| return ReadIdentifier(top);
595| } else
596| return ReadSeparator(c, top);
597| }
598|
599| bool Lex::ReadCharConst(uint top)
600| {
601| char c;
602|
603| for(;;){
604| c = file->Get();
605| if(c == '\\'){
606| c = file->Get();
607| if(c == '\0')
608| return FALSE;
609| }
610| else if(c == '\''){
611| token_len = int(file->GetCurPos() - top + 1);
612| return TRUE;
613| }
614| else if(c == '\n' || c == '\0')
615| return FALSE;
616| }
617| }
618|
619| /*
620| If text is a sequence of string constants like:
621| "string1" "string2" L"string3"
622| then the string constants are delt with as a single constant.
623| */
624| bool Lex::ReadStrConst(uint top)
625| {
626| char c;
627|
628| // Skip the L if there is one
629| if (*file->Read(top) == 'L')
630| file->Get();
631|
632| for(;;){
633| c = file->Get();
634| if(c == '\\'){
635| c = file->Get();
636| if(c == '\0')
637| return FALSE;
638| }
639| else if(c == '"'){
640| uint pos = file->GetCurPos() + 1;
641| int nline = 0;
642| do{
643| c = file->Get();
644| if(c == '\n')
645| ++nline;
646| } while(is_blank(c) || c == '\n');
647|
648| if(c == '"')
649| /* line_number += nline; */ ;
650| else{
651| token_len = int(pos - top);
652| file->Rewind(pos);
653| return TRUE;
654| }
655| }
656| else if(c == '\n' || c == '\0')
657| return FALSE;
658| }
659| }
660|
661| int Lex::ReadNumber(char c, uint top)
662| {
663| char c2 = file->Get();
664|
665| if(c == '0' && is_xletter(c2)){
666| do{
667| c = file->Get();
668| } while(is_hexdigit(c));
669| while(is_int_suffix(c))
670| c = file->Get();
671|
672| file->Unget();
673| token_len = int(file->GetCurPos() - top + 1);
674| return token(Constant);
675| }
676|
677| while(is_digit(c2))
678| c2 = file->Get();
679|
680| if(is_int_suffix(c2))
681| do{
682| c2 = file->Get();
683| }while(is_int_suffix(c2));
684| else if(c2 == '.')
685| return ReadFloat(top);
686| else if(is_eletter(c2)){
687| file->Unget();
688| return ReadFloat(top);
689| }
690|
691| file->Unget();
692| token_len = int(file->GetCurPos() - top + 1);
693| return token(Constant);
694| }
695|
696| int Lex::ReadFloat(uint top)
697| {
698| char c;
699|
700| do{
701| c = file->Get();
702| }while(is_digit(c));
703| if(is_float_suffix(c))
704| do{
705| c = file->Get();
706| }while(is_float_suffix(c));
707| else if(is_eletter(c)){
708| uint p = file->GetCurPos();
709| c = file->Get();
710| if(c == '+' || c == '-'){
711| c = file->Get();
712| if(!is_digit(c)){
713| file->Rewind(p);
714| token_len = int(p - top);
715| return token(Constant);
716| }
717| }
718| else if(!is_digit(c)){
719| file->Rewind(p);
720| token_len = int(p - top);
721| return token(Constant);
722| }
723|
724| do{
725| c = file->Get();
726| }while(is_digit(c));
727|
728| while(is_float_suffix(c))
729| c = file->Get();
730| }
731|
732| file->Unget();
733| token_len = int(file->GetCurPos() - top + 1);
734| return token(Constant);
735| }
736|
737| // ReadLineDirective() simply ignores a line beginning with '#'
738|
739| bool Lex::ReadLineDirective()
740| {
741| char c;
742|
743| do{
744| c = file->Get();
745| }while(c != '\n' && c != '\0');
746| return TRUE;
747| }
748|
749| int Lex::ReadIdentifier(uint top)
750| {
751| char c;
752|
753| do{
754| c = file->Get();
755| }while(is_letter(c) || is_digit(c));
756|
757| uint len = file->GetCurPos() - top;
758| token_len = int(len);
759| file->Unget();
760|
761| return Screening((char*)file->Read(top), int(len));
762| }
763|
764|
765|
766|
767|
768| static struct rw_table {
769| char* name;
770| long value;
771| } table[] = {
772| #if defined(__GNUG__) || defined(_GNUG_SYNTAX)
773| { "__alignof__", token(SIZEOF
774| { "__asm__", token(ATTRI
775| { "__attribute__", token(ATTRIBUTE)
776| { "__complex__",token(Ignore) },
777| { "__const", token(C
778| { "__extension__", token(EXTENSION)
779| { "__imag__", token(Ign
780| { "__inline__", token(INLIN
781| { "__real__", token(Ign
782| { "__restrict", token(Ignor
783| { "__restrict__", token(Ignore)
784| { "__signed", token(SIG
785| { "__signed__", token(SIGNE
786| { "__typeof", token(TYP
787| { "__typeof__", token(TYPEO
788| #endif
789| { "asm", token(AT
790| { "auto", toke
791| #if !defined(_MSC_VER) || (_MSC_VER >= 1100)
792| { "bool", token(B
793| #endif
794| { "break", token(
795| { "case", toke
796| { "catch", token(
797| { "char", toke
798| { "class", token(
799| { "const", token(
800| { "continue", token(CONTI
801| { "default", token(DEF
802| { "delete", token(DE
803| { "do",
804| { "double", token(DO
805| { "else", toke
806| { "enum", toke
807| { "extern", token(EX
808| { "float", token(
809| { "for", to
810| { "friend", token(FR
811| { "goto", toke
812| { "if",
813| { "inline", token(IN
814| { "int", to
815| { "long", toke
816| { "metaclass", token(METACLASS) },
817| { "mutable", token(MUT
818| { "namespace", token(NAMESPA
819| { "new", to
820| { "operator", token(OPERA
821| { "private", token(PRI
822| { "protected", token(PROTECT
823| { "public", token(PU
824| { "register", token(REGIS
825| { "return", token(RE
826| { "short", token(
827| { "signed", token(SI
828| { "sizeof", token(SI
829| { "static", token(ST
830| { "struct", token(ST
831| { "switch", token(SW
832| { "template", token(TEMPL
833| { "this", toke
834| { "throw", token(
835| { "try", to
836| { "typedef", token(TYP
837| { "typeid", token(TY
838| { "typename", token(CLASS) },
839| { "union", token(
840| { "unsigned", token(UNSIG
841| { "using", token(
842| { "virtual", token(VIR
843| { "void", toke
844| { "volatile", token(VOLAT
845| { "while", token(
846|
847| };
848|
849| static void InitializeOtherKeywords()
850| {
851| static BOOL done = FALSE;
852|
853| if(done)
854| return;
855| else
856| done = TRUE;
857|
858| if(regularCpp)
859| for(unsigned int i = 0; i < sizeof(table) / sizeof(table[0]); ++i)
860| if(table[i].value == METACLASS){
861| table[i].value = Identifier;
862| break;
863| }
864|
865| #if defined(_MSC_VER)
866| assert(Lex::RecordKeyword("cdecl", Ignore));
867| assert(Lex::RecordKeyword("_cdecl", Ignore));
868| assert(Lex::RecordKeyword("__cdecl", Ignore));
869|
870| assert(Lex::RecordKeyword("_fastcall", Ignore));
871| assert(Lex::RecordKeyword("__fastcall", Ignore));
872|
873| assert(Lex::RecordKeyword("_based", Ignore));
874| assert(Lex::RecordKeyword("__based", Ignore));
875|
876| assert(Lex::RecordKeyword("_asm", ASM));
877| assert(Lex::RecordKeyword("__asm", ASM));
878|
879| assert(Lex::RecordKeyword("_inline", INLINE));
880| assert(Lex::RecordKeyword("__inline", INLINE));
881|
882| assert(Lex::RecordKeyword("_stdcall", Ignore));
883| assert(Lex::RecordKeyword("__stdcall", Ignore));
884|
885| assert(Lex::RecordKeyword("__declspec", DECLSPEC));
886|
887| assert(Lex::RecordKeyword("__int8", CHAR));
888| assert(Lex::RecordKeyword("__int16", SHORT));
889| assert(Lex::RecordKeyword("__int32", INT));
890| assert(Lex::RecordKeyword("__int64", INT64));
891| #endif
892| }
893|
894| int Lex::Screening(char *identifier, int len)
895| {
896| struct rw_table *low, *high, *mid;
897| int c, token;
898|
899| low = table;
900| high = &table[sizeof(table) / sizeof(table[0]) - 1];
901| while(low <= high){
902| mid = low + (high - low) / 2;
903| if((c = strncmp(mid->name, identifier, len)) == 0)
904| if(mid->name[len] == '\0')
905| return mid->value;
906| else
907| high = mid - 1;
908| else if(c < 0)
909| low = mid + 1;
910| else
911| high = mid - 1;
912| }
913|
914| if(user_keywords == nil)
915| user_keywords = new HashTable;
916|
917| if(user_keywords->Lookup(identifier, len, (HashValue*)&token))
918| return token;
919|
920| return token(Identifier);
921| }
922|
923| int Lex::ReadSeparator(char c, uint top)
924| {
925| char c1 = file->Get();
926|
927| token_len = 2;
928| if(c1 == '='){
929| switch(c){
930| case '*' :
931| case '/' :
932| case '%' :
933| case '+' :
934| case '-' :
935| case '&' :
936| case '^' :
937| case '|' :
938| return token(AssignOp);
939| case '=' :
940| case '!' :
941| return token(EqualOp);
942| case '<' :
943| case '>' :
944| return token(RelOp);
945| default :
946| file->Unget();
947| token_len = 1;
948| return SingleCharOp(c);
949| }
950| }
951| else if(c == c1){
952| switch(c){
953| case '<' :
954| case '>' :
955| if(file->Get() != '='){
956| file->Unget();
957| return token(ShiftOp);
958| }
959| else{
960| token_len = 3;
961| return token(AssignOp);
962| }
963| case '|' :
964| return token(LogOrOp);
965| case '&' :
966| return token(LogAndOp);
967| case '+' :
968| case '-' :
969| return token(IncOp);
970| case ':' :
971| return token(Scope);
972| case '.' :
973| if(file->Get() == '.'){
974| token_len = 3;
975| return token(Ellipsis);
976| }
977| else
978| file->Unget();
979| case '/' :
980| return ReadComment(c1, top);
981| default :
982| file->Unget();
983| token_len = 1;
984| return SingleCharOp(c);
985| }
986| }
987| else if(c == '.' && c1 == '*')
988| return token(PmOp);
989| else if(c == '-' && c1 == '>')
990| if(file->Get() == '*'){
991| token_len = 3;
992| return token(PmOp);
993| }
994| else{
995| file->Unget();
996| return token(ArrowOp);
997| }
998| else if(c == '/' && c1 == '*')
999| return ReadComment(c1, top);
1000| else{
1001| file->Unget();
1002| token_len = 1;
1003| return SingleCharOp(c);
1004| }
1005|
1006| std::cerr << "*** An invalid character has been found! ("
1007| << (int)c << ',' << (int)c1 << ")\n";
1008| return token(BadToken);
1009| }
1010|
1011| int Lex::SingleCharOp(unsigned char c)
1012| {
1013| /* !"#$%&'()*+,-./0123456789:;<=>? */
1014| static char valid[] = "x xx xxxxxxxx xxxxxx";
1015|
1016| if('!' <= c && c <= '?' && valid[c - '!'] == 'x')
1017| return c;
1018| else if(c == '[' || c == ']' || c == '^')
1019| return c;
1020| else if('{' <= c && c <= '~')
1021| return c;
1022| else if(c == '#') {
1023| // Skip to end of line
1024| do {
1025| c = file->Get();
1026| }while(c != '\n' && c != '\0');
1027| return Ignore;
1028| } else {
1029| std::cerr << "*** An invalid character has been found! ("<<(char)c<<")"<< std::endl;
1030| return token(BadToken);
1031| }
1032| }
1033|
1034| int Lex::ReadComment(char c, uint top) {
1035| uint len = 0;
1036| if (c == '*') // a nested C-style comment is proh
1037| do {
1038| c = file->Get();
1039| if (c == '*') {
1040| c = file->Get();
1041| if (c == '/') {
1042| len = 1;
1043| break;
1044| }
1045| else
1046| file->Unget();
1047| }
1048| }while(c != '\0');
1049| else /* if (c == '/') */
1050| do {
1051| c = file->Get();
1052| }while(c != '\n' && c != '\0');
1053|
1054| len += file->GetCurPos() - top;
1055| token_len = int(len);
1056| Leaf* node = new Leaf((char*)file->Read(top), int(len));
1057| comments = Ptree::Snoc(comments, node);
1058| return Ignore;
1059| }
1060|
1061| Ptree* Lex::GetComments() {
1062| Ptree* c = comments;
1063| comments = nil;
1064| return c;
1065| }
1066|
1067| Ptree* Lex::GetComments2() {
1068| return comments;
1069| }
1070|
1071| #ifdef TEST
1072| #include <stdio.h>
1073|
1074| main()
1075| {
1076| int i = 0;
1077| Token token;
1078|
1079| Lex lex(new ProgramFromStdin);
1080| for(;;){
1081| // int t = lex.GetToken(t
1082| int t = lex.LookAhead(i++, token);
1083| if(t == 0)
1084| break;
1085| else if(t < 128)
1086| printf("%c (%x): ", t, t);
1087| else
1088| printf("%-10.10s (%x): ", (char*)t, t);
1089|
1090| putchar('"');
1091| while(token.len-- > 0)
1092| putchar(*token.ptr++);
1093|
1094| puts("\"");
1095| };
1096| }
1097| #endif
1098|
1099|
1100|
1101|
1102|
1103|
1104|
1105|
1106|
1107|
1108|
1109|
1110|
1111|
1112|
1113|
1114|
1115|
1116|
1117|
1118|
1119|
1120|
1121|
1122|
1123|
1124|
1125|
1126|
1127|
1128|
1129|
1130|
1131|
1132|
1133|
1134|
1135|
1136|
1137|
1138|
1139|
1140|
1141|
1142|
1143|
1144|
1145|
1146|
1147|