Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

Token.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file Token.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032 
00033         @version        Initial version, March 2004      
00034                         Circular dependency split; Oct 2004  
00035 
00036         @author         Kris, Chris Sauls
00037 
00038 
00039 *******************************************************************************/
00040 
00041 module mango.io.Token;
00042 
00043 private import  std.ctype;
00044 
00045 private import  mango.io.Exception;
00046 
00047 private import  mango.io.model.IWriter;
00048 
00049 /*******************************************************************************
00050 
00051         This provides backward compatability by importing the additional
00052         token styles into this namespace.
00053 
00054 *******************************************************************************/
00055 
00056 public  import  mango.io.TokenEx;
00057 
00058 
00059 /*******************************************************************************
00060 
00061         Tokens used by Tokenizer class. Tokens do not copy their content
00062         so they are quite useful for parsing quantites of data quickly. 
00063         Conversely since each token is mapped into an external buffer, 
00064         you should be aware that changes to said buffer will impact any
00065         tokens based upon it. You may sidestep this by using the clone()
00066         method, or toString().dup
00067 
00068         Tokens can convert from a variety of numeric format to ascii text.
00069         Formats currently include int, uint, long, ulong, and real. Each
00070         number may be preceded by whitespace, and an optional '+' or '-'
00071         specifier. Note that real-number format is simplistic in that it 
00072         does not support exponential declarations.  Note the conversion 
00073         methods should probably be moved elsewhere.
00074 
00075         Here's a brief example of how to apply Token with Tokenizers:
00076 
00077         @code
00078         // open a file for reading
00079         FileConduit fc = new FileConduit ("test.txt");
00080 
00081         // create a buffer for reading the file
00082         IBuffer buffer = fc.createBuffer ();
00083 
00084         // create a token for receiving the line
00085         Token token = new Token;
00086 
00087         // read file a line at a time. Method next() returns false when no more 
00088         // delimiters are found. Note there may be an unterminated line at eof
00089         while (Tokenizers.line.next(buffer, token) || token.getLength())
00090                Stdout.put(token).cr();
00091         @endcode
00092 
00093         See also BoundToken, ReaderToken, CompositeToken and HybridToken.
00094 
00095 *******************************************************************************/
00096 
00097 class Token : IWritable
00098 { 
00099         private int     type;
00100         private char[]  content;
00101 
00102         private static TokenException Error;
00103 
00104         private enum State {Begin, Number, Fraction};
00105 
00106         alias void delegate(int digit) Accumulator;
00107 
00108         /***********************************************************************
00109         
00110         ***********************************************************************/
00111 
00112         private static  real divisors[] = 
00113                         [
00114                         0.1, 
00115                         0.01, 
00116                         0.001, 
00117                         0.0001, 
00118                         0.00001, 
00119                         0.000001, 
00120                         0.0000001, 
00121                         0.00000001, 
00122                         0.000000001, 
00123                         0.0000000001, 
00124                         0.00000000001, 
00125                         0.000000000001, 
00126                         0.0000000000001, 
00127                         0.00000000000001, 
00128                         0.000000000000001, 
00129                         0.0000000000000001, 
00130                         0.00000000000000001,
00131                         0.000000000000000001,
00132                         0.0000000000000000001,
00133                         0.00000000000000000001,
00134                         0.000000000000000000001,
00135                         0.0000000000000000000001,
00136                         ];
00137 
00138         /***********************************************************************
00139         
00140                 Construct a static exception instance.
00141 
00142         ***********************************************************************/
00143 
00144         static this()
00145         {
00146                 Error = new TokenException ("unexpected numeric format");
00147         }
00148 
00149         /***********************************************************************
00150         
00151                 Set the content of this token.
00152 
00153         ***********************************************************************/
00154 
00155         Token set (char[] content)
00156         {
00157                 this.content = content;
00158                 return this;
00159         }
00160 
00161         /***********************************************************************
00162                 
00163                 Return the length of this token.
00164 
00165         ***********************************************************************/
00166 
00167         int getLength ()
00168         {
00169                 return content.length;
00170         }
00171 
00172         /***********************************************************************
00173         
00174                 Set the type of this token. Token types can be useful when
00175                 one wishes to categorize input patterns.
00176 
00177         ***********************************************************************/
00178 
00179         Token setType (int type)
00180         {
00181                 this.type = type;
00182                 return this;
00183         }
00184 
00185         /***********************************************************************
00186         
00187                 Return the type associated with this token. See setType().
00188 
00189         ***********************************************************************/
00190 
00191         int getType ()
00192         {
00193                 return type;
00194         }
00195 
00196         /***********************************************************************
00197         
00198                 Convert this token to an integer.
00199 
00200         ***********************************************************************/
00201 
00202         int toInt ()
00203         {
00204                 int i = 0;
00205 
00206                 void multiply (int digit)
00207                 {
00208                         i = i * 10 + digit;
00209                 }
00210 
00211                 return parseDigits (&multiply, null) ? -i : i;
00212         }
00213 
00214         /***********************************************************************
00215         
00216                 Convert this token to an unsigned integer.
00217 
00218         ***********************************************************************/
00219 
00220         uint toUInt ()
00221         {
00222                 uint i = 0;
00223 
00224                 void multiply (int digit)
00225                 {
00226                         i = i * 10 + digit;
00227                 }
00228 
00229                 if (parseDigits (&multiply, null))
00230                     throw Error;
00231                 return i;
00232         }
00233 
00234         /***********************************************************************
00235         
00236                 Convert this token to a long integer.
00237 
00238         ***********************************************************************/
00239 
00240         long toLong ()
00241         {
00242                 long l = 0;
00243 
00244                 void multiply (int digit)
00245                 {
00246                         l = l * 10 + digit;
00247                 }
00248 
00249                 return parseDigits (&multiply, null) ? -l : l;
00250         }
00251 
00252         /***********************************************************************
00253         
00254                 Convert this token to an unsigned long integer.
00255 
00256         ***********************************************************************/
00257 
00258         ulong toULong ()
00259         {
00260                 ulong l = 0;
00261 
00262                 void multiply (int digit)
00263                 {
00264                         l = l * 10 + digit;
00265                 }
00266 
00267                 if (parseDigits (&multiply, null))
00268                     throw Error;
00269                 return l;
00270         }
00271 
00272         /***********************************************************************
00273 
00274                 Convert this token to a real.
00275 
00276         ***********************************************************************/
00277 
00278         real toReal ()
00279         {
00280                 real    r = 0,
00281                         f = 0;
00282                 int     divisor = 0;
00283 
00284                 void whole (int c)
00285                 {
00286                         r = r * 10.0 + c;
00287                 }
00288 
00289                 void fraction (int c)
00290                 {                        
00291                         f = f * 10.0 + c;
00292                         ++divisor;
00293                 }
00294 
00295                 bool neg = parseDigits (&whole, &fraction);
00296 
00297                 if (divisor)
00298                     if (divisor < divisors.length)
00299                         r += f * divisors [divisor-1];
00300                     else
00301                        throw Error;
00302 
00303                 return neg ? -r : r;
00304         }
00305 
00306         /***********************************************************************
00307         
00308                 Clone this token, making a copy of the content also.
00309 
00310         ***********************************************************************/
00311 
00312         Token clone ()
00313         {
00314                 Token clone = new Token;
00315 
00316                 clone.set (toString (false));
00317                 clone.type = type;
00318                 return clone;
00319         }
00320 
00321         /***********************************************************************
00322         
00323                 Return a reference to this tokens content. Duplicate it
00324                 only if 'slice' is explicitly set to false (defaults to 
00325                 a slice instead).
00326 
00327         ***********************************************************************/
00328 
00329         char[] toString (bool slice = true)
00330         {
00331                 if (slice)
00332                     return content;
00333                 return content.dup;
00334         }
00335 
00336         /***********************************************************************
00337         
00338                 Is this token equal to another?
00339 
00340         ***********************************************************************/
00341 
00342         override int opEquals (Object o)
00343         {
00344                 Token other = cast(Token) o;
00345 
00346                 if (other is null)
00347                     return super.opEquals (o);
00348                 return typeid(char[]).equals (&content, &other.content);
00349                 //content.typeinfo.equals (content, other.content);
00350         }
00351 
00352         /***********************************************************************
00353         
00354                 Compare this token to another.
00355 
00356         ***********************************************************************/
00357 
00358         override int opCmp (Object o)
00359         {
00360                 Token other = cast(Token) o;
00361 
00362                 if (other is null)
00363                     return super.opCmp (o);
00364 
00365                 return typeid(char[]).compare (&content, &other.content);
00366                 //content.typeinfo.compare (content, other.content);
00367         }
00368 
00369         /***********************************************************************
00370         
00371                 Hash this token
00372 
00373         ***********************************************************************/
00374 
00375         override uint toHash ()
00376         {
00377                 return typeid(char[]).getHash (&content);
00378                 // content.typeinfo.getHash (content);
00379         }
00380 
00381         /***********************************************************************
00382         
00383                 Make the Token class compatible with IWriter instances.
00384 
00385         ***********************************************************************/
00386 
00387         void write (IWriter w)
00388         {
00389                 w.put (content);
00390         }
00391 
00392         /***********************************************************************
00393         
00394                 Parse a set of digits, optionally prefixed by a '+' or '-', 
00395                 and optionally with a '.' contained within. Delegates come
00396                 in quite handy here, and add relatively little overhead. 
00397 
00398         ***********************************************************************/
00399 
00400         private bool parseDigits (Accumulator dg, Accumulator fraction)
00401         {
00402                 bool            neg = false;
00403                 State           state = State.Begin; 
00404 
00405                 foreach (char c; content)
00406                          switch (state)   
00407                                 {
00408                                 case State.Begin:
00409                                      if (isspace (c))
00410                                          break;
00411 
00412                                      state = State.Number;
00413                                      if (c == '-')
00414                                         {
00415                                         neg = true;
00416                                         break;
00417                                         }
00418                                      else
00419                                         if (c == '+')
00420                                             break;
00421                                      // fall thru!
00422   
00423                                 case State.Number:
00424                                      if (c == '.' && fraction)
00425                                         {
00426                                         state = State.Fraction;
00427                                         dg = fraction;
00428                                         break;
00429                                         }
00430                                      // fall thru!
00431 
00432                                 case State.Fraction:
00433                                      if (isdigit (c))
00434                                          dg (c - '0');
00435                                      else
00436                                         throw Error;
00437                                      break;
00438 
00439                                 default:
00440                                 }
00441                 return neg;
00442         }
00443 }

Generated on Tue Jan 25 21:18:23 2005 for Mango by doxygen 1.3.6