KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jacorb > trading > constraint > Lex


1
2 // Copyright (C) 1998-1999
3
// Object Oriented Concepts, Inc.
4

5 // **********************************************************************
6
//
7
// Copyright (c) 1997
8
// Mark Spruiell (mark@intellisoft.com)
9
//
10
// See the COPYING file for more information
11
//
12
// **********************************************************************
13

14 package org.jacorb.trading.constraint;
15
16 import java.io.*;
17 import java.util.*;
18
19
20 /**
21  * Lex is the lexical analyzer used to produce tokens from an
22  * input source
23  */

24 public class Lex
25 {
26   private StringBuffer JavaDoc m_input;
27   private int m_token = ERROR;
28   private StringBuffer JavaDoc m_buffer = new StringBuffer JavaDoc();
29   private String JavaDoc m_lexeme = null;
30   private int m_pos = 0;
31   private Hashtable m_literals = new Hashtable();
32   private boolean m_eof = false;
33
34
35   public static final int ERROR = 0;
36   public static final int END = 1; // EOF
37
public static final int LPAREN = 2;
38   public static final int RPAREN = 3;
39   public static final int EXIST = 4;
40   public static final int MINUS = 5;
41   public static final int NOT = 6;
42   public static final int MULT = 7;
43   public static final int DIV = 8;
44   public static final int PLUS = 9;
45   public static final int TILDE = 10;
46   public static final int IN = 11;
47   public static final int EQUAL = 12;
48   public static final int NOT_EQUAL = 13;
49   public static final int LESS = 14;
50   public static final int LESS_EQUAL = 15;
51   public static final int GREATER = 16;
52   public static final int GREATER_EQUAL = 17;
53   public static final int AND = 18;
54   public static final int OR = 19;
55   public static final int IDENT = 20;
56   public static final int TRUE_LIT = 21; // literal
57
public static final int FALSE_LIT = 22; // literal
58
public static final int STRING_LIT = 23; // literal
59
public static final int NUMBER_LIT = 24; // literal
60
public static final int PREF_MIN = 25;
61   public static final int PREF_MAX = 26;
62   public static final int PREF_WITH = 27;
63   public static final int PREF_RANDOM = 28;
64   public static final int PREF_FIRST = 29;
65
66
67   private Lex()
68   {
69   }
70
71
72   /**
73    * Constructs a new lexical analyzer
74    */

75   public Lex(Reader reader)
76   {
77     BufferedReader br = new BufferedReader(reader);
78     m_input = new StringBuffer JavaDoc();
79
80       // read all of the characters into our string buffer
81
boolean eof = false;
82     char[] chars = new char[512];
83     while (! eof) {
84       try {
85         int len = br.read(chars);
86         if (len < 0)
87           eof = true;
88         else
89           m_input.append(chars, 0, len);
90       }
91       catch (IOException e) {
92       }
93     }
94
95       // load literals - maps the token to its numeric value
96
m_literals.put("TRUE", new Integer JavaDoc(TRUE_LIT));
97     m_literals.put("FALSE", new Integer JavaDoc(FALSE_LIT));
98     m_literals.put("and", new Integer JavaDoc(AND));
99     m_literals.put("not", new Integer JavaDoc(NOT));
100     m_literals.put("or", new Integer JavaDoc(OR));
101     m_literals.put("exist", new Integer JavaDoc(EXIST));
102     m_literals.put("in", new Integer JavaDoc(IN));
103     m_literals.put("min", new Integer JavaDoc(PREF_MIN));
104     m_literals.put("max", new Integer JavaDoc(PREF_MAX));
105     m_literals.put("with", new Integer JavaDoc(PREF_WITH));
106     m_literals.put("random", new Integer JavaDoc(PREF_RANDOM));
107     m_literals.put("first", new Integer JavaDoc(PREF_FIRST));
108
109       // advance to first token
110
nextToken();
111   }
112
113
114   /**
115    * Advances to the next token
116    */

117   public void nextToken()
118   {
119     int result = ERROR;
120
121     boolean done = false;
122
123     clearLexeme();
124
125     while (! done) {
126
127       char c = nextChar();
128
129       // check for end-of-file
130
if (eof()) {
131         result = END;
132         break;
133       }
134
135       switch (c) {
136         case '(':
137           result = LPAREN;
138           done = true;
139           addLexeme(c);
140           break;
141
142         case ')':
143           result = RPAREN;
144           done = true;
145           addLexeme(c);
146           break;
147
148         case '-':
149           result = MINUS;
150           done = true;
151           addLexeme(c);
152           break;
153
154         case '*':
155           result = MULT;
156           done = true;
157           addLexeme(c);
158           break;
159
160         case '/':
161           result = DIV;
162           done = true;
163           addLexeme(c);
164           break;
165
166         case '+':
167           result = PLUS;
168           done = true;
169           addLexeme(c);
170           break;
171
172         case '~':
173           result = TILDE;
174           done = true;
175           addLexeme(c);
176           break;
177
178         case '=': {
179             // make sure the next character is '=' (to form token '==')
180
char peek = nextChar();
181           if (peek == '=') {
182             addLexeme(c);
183             addLexeme(peek);
184             result = EQUAL;
185           }
186           else {
187             pushBack(peek);
188             result = ERROR;
189           }
190           done = true;
191           break;
192         }
193
194         case '!': {
195             // make sure the next character is '=' (to form token '!=')
196
char peek = nextChar();
197           if (peek == '=') {
198             addLexeme(c);
199             addLexeme(peek);
200             result = NOT_EQUAL;
201           }
202           else
203             result = ERROR;
204           done = true;
205           break;
206         }
207
208         case '<': {
209           char peek = nextChar();
210           addLexeme(c);
211             // check for trailing '=' (to form token '<=')
212
if (peek == '=') {
213             addLexeme(peek);
214             result = LESS_EQUAL;
215           }
216           else {
217               // trailing '=' not found - push back the character
218
pushBack(peek);
219             result = LESS;
220           }
221           done = true;
222           break;
223         }
224
225         case '>': {
226           char peek = nextChar();
227           addLexeme(c);
228             // check for trailing '=' (to form token '>=')
229
if (peek == '=') {
230             addLexeme(peek);
231             result = GREATER_EQUAL;
232           }
233           else {
234               // trailing '=' not found - push back the character
235
pushBack(peek);
236             result = GREATER;
237           }
238           done = true;
239           break;
240         }
241
242         case '\'': {
243             // quoted string
244

245           result = STRING_LIT;
246
247             // search until we find the matching quote
248
while ((c = nextChar()) != '\'') {
249             if (c == '\n') {
250               result = ERROR;
251               break;
252             }
253
254               // allow characters to be escaped (e.g. the quote)
255
if (c == '\\')
256               c = nextChar(); // skip the escape char
257

258               // if we encounter EOF before the matching quote, it's an error
259
if (eof()) {
260               result = ERROR;
261               break;
262             }
263
264             addLexeme(c);
265           } // while
266

267           done = true;
268           break;
269         }
270
271         case '0':
272         case '1':
273         case '2':
274         case '3':
275         case '4':
276         case '5':
277         case '6':
278         case '7':
279         case '8':
280         case '9': {
281             // deal with a number
282

283           boolean seenExp = false;
284           boolean seenPeriod = false;
285           boolean formatError = false;
286           char lastChar = 0;
287
288           while (! done) {
289
290             switch (c) {
291               case '.':
292                   // it's an error if we've already seen a '.'
293
if (seenPeriod) {
294                   formatError = true;
295                   done = true;
296                 }
297                 else {
298                   addLexeme(c);
299                   seenPeriod = true;
300                 }
301                 break;
302
303               case 'E':
304               case 'e':
305                   // it's an error if we've already seen a 'E' or if
306
// the previous character was not a digit
307
if (seenExp || ! Character.isDigit(lastChar)) {
308                   formatError = true;
309                   done = true;
310                 }
311                 else {
312                   seenExp = true;
313                   addLexeme(c);
314                   c = nextChar();
315                     // check for +/- on exponent
316
if (c == '+' || c == '-')
317                     addLexeme(c);
318                   else
319                     pushBack(c);
320                 }
321                 break;
322
323               case '0':
324               case '1':
325               case '2':
326               case '3':
327               case '4':
328               case '5':
329               case '6':
330               case '7':
331               case '8':
332               case '9':
333                 addLexeme(c);
334                 break;
335
336               default:
337                   // anything else terminates the number
338
pushBack(c);
339                 done = true;
340                 break;
341             } // switch (c)
342

343             if (! done) {
344               lastChar = c; // remember the last character
345
c = nextChar();
346               if (eof())
347                 done = true;
348             }
349           } // while (! done)
350

351             // if there were no errors, then decide what kind of
352
// number we've found
353
if (! formatError) {
354               // if the last character seen is not a digit, it's an error
355
if (! Character.isDigit(lastChar))
356               result = ERROR;
357             else
358               result = NUMBER_LIT;
359           }
360           break;
361         }
362
363         case ' ':
364         case '\t':
365         case '\n':
366           continue; // skip whitespace
367

368       } // switch (c)
369

370         // if we're still not done, then we must have a string, either
371
// a literal or an identifier
372
if (! done) {
373           // if the character isn't compatible with the beginning of
374
// a literal or identifier, it's an error
375
if (c != '_' && ! Character.isLetter(c)) {
376           result = ERROR;
377           done = true;
378         }
379         else {
380
381             // deal with string literal or identifier
382

383           while (isIdent(c) && ! eof()) {
384             addLexeme(c);
385             c = nextChar();
386           }
387
388           if (! eof())
389             pushBack(c);
390
391             // see if the lexeme is a literal
392
String JavaDoc lexeme = getLexeme();
393           Integer JavaDoc val = (Integer JavaDoc)m_literals.get(lexeme);
394
395             // if we didn't find a literal, then it must be an identifier
396
if (val == null)
397             result = IDENT;
398           else
399             result = val.intValue();
400
401           done = true;
402         }
403       }
404
405     } // while (! done)
406

407     m_token = result;
408   }
409
410
411   /**
412    * Returns the current token
413    */

414   public int getToken()
415   {
416     return m_token;
417   }
418
419
420   /**
421    * Returns the current lexeme
422    */

423   public String JavaDoc getLexeme()
424   {
425     if (m_lexeme == null)
426       m_lexeme = m_buffer.toString();
427
428     return m_lexeme;
429   }
430
431
432   /**
433    * Returns the current position of the analyzer
434    */

435   public int getPosition()
436   {
437     return m_pos;
438   }
439
440
441   protected boolean eof()
442   {
443     return m_eof;
444   }
445
446
447   protected char nextChar()
448   {
449     char result = 0;
450
451     if (m_pos < m_input.length()) {
452       result = m_input.charAt(m_pos);
453       m_pos++;
454     }
455     else
456       m_eof = true;
457
458     return result;
459   }
460
461
462   protected void pushBack(char c)
463   {
464     if (c != 0)
465       m_pos--;
466   }
467
468
469   protected boolean isIdent(char c)
470   {
471     return (Character.isLetter(c) || Character.isDigit(c) || (c == '_'));
472   }
473
474
475   protected void clearLexeme()
476   {
477     m_lexeme = null;
478     m_buffer.setLength(0);
479   }
480
481
482   protected void addLexeme(char c)
483   {
484     m_buffer.append(c);
485   }
486
487   //**************** comment out this line to enable main()
488

489   public static void main(String JavaDoc[] args)
490   {
491     if (args.length < 1) {
492       System.err.println("Usage: Lex expr");
493       System.exit(1);
494     }
495
496     Lex lex = new Lex(new StringReader(args[0]));
497     int token = lex.getToken();
498     while (token != Lex.END && token != Lex.ERROR) {
499       System.out.println("Token = '" + lex.getLexeme() + "' (" +
500         tokenName(token) + ")");
501       lex.nextToken();
502       token = lex.getToken();
503     }
504     System.out.println("Token = " + tokenName(token));
505   }
506
507
508   protected static String JavaDoc tokenName(int token)
509   {
510     String JavaDoc result;
511
512     switch (token) {
513       case ERROR:
514         result = "ERROR";
515         break;
516       case END:
517         result = "END";
518         break;
519       case LPAREN:
520         result = "LPAREN";
521         break;
522       case RPAREN:
523         result = "RPAREN";
524         break;
525       case EXIST:
526         result = "EXIST";
527         break;
528       case MINUS:
529         result = "MINUS";
530         break;
531       case NOT:
532         result = "NOT";
533         break;
534       case MULT:
535         result = "MULT";
536         break;
537       case DIV:
538         result = "DIV";
539         break;
540       case PLUS:
541         result = "PLUS";
542         break;
543       case TILDE:
544         result = "TILDE";
545         break;
546       case IN:
547         result = "IN";
548         break;
549       case EQUAL:
550         result = "EQUAL";
551         break;
552       case NOT_EQUAL:
553         result = "NOT_EQUAL";
554         break;
555       case LESS:
556         result = "LESS";
557         break;
558       case LESS_EQUAL:
559         result = "LESS_EQUAL";
560         break;
561       case GREATER:
562         result = "GREATER";
563         break;
564       case GREATER_EQUAL:
565         result = "GREATER_EQUAL";
566         break;
567       case AND:
568         result = "AND";
569         break;
570       case OR:
571         result = "OR";
572         break;
573       case IDENT:
574         result = "IDENT";
575         break;
576       case TRUE_LIT:
577         result = "TRUE_LIT";
578         break;
579       case FALSE_LIT:
580         result = "FALSE_LIT";
581         break;
582       case STRING_LIT:
583         result = "STRING_LIT";
584         break;
585       case NUMBER_LIT:
586         result = "NUMBER_LIT";
587         break;
588       case PREF_MIN:
589         result = "PREF_MIN";
590         break;
591       case PREF_MAX:
592         result = "PREF_MAX";
593         break;
594       case PREF_WITH:
595         result = "PREF_WITH";
596         break;
597       case PREF_RANDOM:
598         result = "PREF_RANDOM";
599         break;
600       case PREF_FIRST:
601         result = "PREF_FIRST";
602         break;
603       default:
604         result = "<unknown>";
605     }
606
607     return result;
608   }
609
610   //*********** comment out this line to enable main() */
611
}
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
Popular Tags