Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

Token.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file Token.d
00004         
00005         Copyright (C) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026 
00027                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00028 
00029 
00030         @version        Initial version, March 2004      
00031                         Circular dependency split; Oct 2004  
00032 
00033         @author         Kris, Chris Sauls
00034 
00035 
00036 *******************************************************************************/
00037 
00038 module mango.io.Token;
00039 
00040 private import  std.ctype;
00041 
00042 private import  mango.io.Exception;
00043 
00044 private import  mango.io.model.IWriter;
00045 
00046 /*******************************************************************************
00047 
00048         This provides backward compatability by importing the additional
00049         token styles into this namespace.
00050 
00051 *******************************************************************************/
00052 
00053 public  import  mango.io.TokenEx;
00054 
00055 
00056 /*******************************************************************************
00057 
00058         Tokens used by Tokenizer class. Tokens do not copy their content
00059         so they are quite useful for parsing quantites of data quickly. 
00060         Conversely since each token is mapped into an external buffer, 
00061         you should be aware that changes to said buffer will impact any
00062         tokens based upon it. You may sidestep this by using the clone()
00063         method, or toString().dup
00064 
00065         Tokens can convert from a variety of numeric format to ascii text.
00066         Formats currently include int, uint, long, ulong, and real. Each
00067         number may be preceded by whitespace, and an optional '+' or '-'
00068         specifier. Note that real-number format is simplistic in that it 
00069         does not support exponential declarations.  Note the conversion 
00070         methods should probably be moved elsewhere.
00071 
00072         Here's a brief example of how to apply Token with Tokenizers:
00073 
00074         @code
00075         // open a file for reading
00076         FileConduit fc = new FileConduit ("test.txt");
00077 
00078         // create a buffer for reading the file
00079         IBuffer buffer = fc.createBuffer ();
00080 
00081         // create a token for receiving the line
00082         Token token = new Token;
00083 
00084         // read file a line at a time. Method next() returns false when no more 
00085         // delimiters are found. Note there may be an unterminated line at eof
00086         while (Tokenizers.line.next(buffer, token) || token.getLength())
00087                Stdout.put(token).cr();
00088         @endcode
00089 
00090         See also BoundToken, ReaderToken, CompositeToken and HybridToken.
00091 
00092 *******************************************************************************/
00093 
00094 class Token : IWritable
00095 { 
00096         private int     type;
00097         private char[]  content;
00098 
00099         private static TokenException Error;
00100 
00101         private enum State {Begin, Number, Fraction};
00102 
00103         alias void delegate(int digit) Accumulator;
00104 
00105         /***********************************************************************
00106         
00107         ***********************************************************************/
00108 
00109         private static  real divisors[] = 
00110                         [
00111                         0.1, 
00112                         0.01, 
00113                         0.001, 
00114                         0.0001, 
00115                         0.00001, 
00116                         0.000001, 
00117                         0.0000001, 
00118                         0.00000001, 
00119                         0.000000001, 
00120                         0.0000000001, 
00121                         0.00000000001, 
00122                         0.000000000001, 
00123                         0.0000000000001, 
00124                         0.00000000000001, 
00125                         0.000000000000001, 
00126                         0.0000000000000001, 
00127                         0.00000000000000001,
00128                         0.000000000000000001,
00129                         0.0000000000000000001,
00130                         0.00000000000000000001,
00131                         0.000000000000000000001,
00132                         0.0000000000000000000001,
00133                         ];
00134 
00135         /***********************************************************************
00136         
00137                 Construct a static exception instance.
00138 
00139         ***********************************************************************/
00140 
00141         static this()
00142         {
00143                 Error = new TokenException ("unexpected numeric format");
00144         }
00145 
00146         /***********************************************************************
00147         
00148                 Set the content of this token.
00149 
00150         ***********************************************************************/
00151 
00152         Token set (char[] content)
00153         {
00154                 this.content = content;
00155                 return this;
00156         }
00157 
00158         /***********************************************************************
00159                 
00160                 Return the length of this token.
00161 
00162         ***********************************************************************/
00163 
00164         int getLength ()
00165         {
00166                 return content.length;
00167         }
00168 
00169         /***********************************************************************
00170         
00171                 Set the type of this token. Token types can be useful when
00172                 one wishes to categorize input patterns.
00173 
00174         ***********************************************************************/
00175 
00176         Token setType (int type)
00177         {
00178                 this.type = type;
00179                 return this;
00180         }
00181 
00182         /***********************************************************************
00183         
00184                 Return the type associated with this token. See setType().
00185 
00186         ***********************************************************************/
00187 
00188         int getType ()
00189         {
00190                 return type;
00191         }
00192 
00193         /***********************************************************************
00194         
00195                 Convert this token to an integer.
00196 
00197         ***********************************************************************/
00198 
00199         int toInt ()
00200         {
00201                 int i = 0;
00202 
00203                 void multiply (int digit)
00204                 {
00205                         i = i * 10 + digit;
00206                 }
00207 
00208                 return parseDigits (&multiply, null) ? -i : i;
00209         }
00210 
00211         /***********************************************************************
00212         
00213                 Convert this token to an unsigned integer.
00214 
00215         ***********************************************************************/
00216 
00217         uint toUInt ()
00218         {
00219                 uint i = 0;
00220 
00221                 void multiply (int digit)
00222                 {
00223                         i = i * 10 + digit;
00224                 }
00225 
00226                 if (parseDigits (&multiply, null))
00227                     throw Error;
00228                 return i;
00229         }
00230 
00231         /***********************************************************************
00232         
00233                 Convert this token to a long integer.
00234 
00235         ***********************************************************************/
00236 
00237         long toLong ()
00238         {
00239                 long l = 0;
00240 
00241                 void multiply (int digit)
00242                 {
00243                         l = l * 10 + digit;
00244                 }
00245 
00246                 return parseDigits (&multiply, null) ? -l : l;
00247         }
00248 
00249         /***********************************************************************
00250         
00251                 Convert this token to an unsigned long integer.
00252 
00253         ***********************************************************************/
00254 
00255         ulong toULong ()
00256         {
00257                 ulong l = 0;
00258 
00259                 void multiply (int digit)
00260                 {
00261                         l = l * 10 + digit;
00262                 }
00263 
00264                 if (parseDigits (&multiply, null))
00265                     throw Error;
00266                 return l;
00267         }
00268 
00269         /***********************************************************************
00270 
00271                 Convert this token to a real.
00272 
00273         ***********************************************************************/
00274 
00275         real toReal ()
00276         {
00277                 real    r = 0,
00278                         f = 0;
00279                 int     divisor = 0;
00280 
00281                 void whole (int c)
00282                 {
00283                         r = r * 10.0 + c;
00284                 }
00285 
00286                 void fraction (int c)
00287                 {                        
00288                         f = f * 10.0 + c;
00289                         ++divisor;
00290                 }
00291 
00292                 bool neg = parseDigits (&whole, &fraction);
00293 
00294                 if (divisor)
00295                     if (divisor < divisors.length)
00296                         r += f * divisors [divisor-1];
00297                     else
00298                        throw Error;
00299 
00300                 return neg ? -r : r;
00301         }
00302 
00303         /***********************************************************************
00304         
00305                 Clone this token, making a copy of the content also.
00306 
00307         ***********************************************************************/
00308 
00309         Token clone ()
00310         {
00311                 Token clone = new Token;
00312 
00313                 clone.set (toString (false));
00314                 clone.type = type;
00315                 return clone;
00316         }
00317 
00318         /***********************************************************************
00319         
00320                 Return a reference to this tokens content. Duplicate it
00321                 only if 'slice' is explicitly set to false (defaults to 
00322                 a slice instead).
00323 
00324         ***********************************************************************/
00325 
00326         char[] toString (bool slice = true)
00327         {
00328                 if (slice)
00329                     return content;
00330                 return content.dup;
00331         }
00332 
00333         /***********************************************************************
00334         
00335                 Is this token equal to another?
00336 
00337         ***********************************************************************/
00338 
00339         override int opEquals (Object o)
00340         {
00341                 Token other = cast(Token) o;
00342 
00343                 if (other is null)
00344                     return super.opEquals (o);
00345                 return typeid(char[]).equals (&content, &other.content);
00346                 //content.typeinfo.equals (content, other.content);
00347         }
00348 
00349         /***********************************************************************
00350         
00351                 Compare this token to another.
00352 
00353         ***********************************************************************/
00354 
00355         override int opCmp (Object o)
00356         {
00357                 Token other = cast(Token) o;
00358 
00359                 if (other is null)
00360                     return super.opCmp (o);
00361 
00362                 return typeid(char[]).compare (&content, &other.content);
00363                 //content.typeinfo.compare (content, other.content);
00364         }
00365 
00366         /***********************************************************************
00367         
00368                 Hash this token
00369 
00370         ***********************************************************************/
00371 
00372         override uint toHash ()
00373         {
00374                 return typeid(char[]).getHash (&content);
00375                 // content.typeinfo.getHash (content);
00376         }
00377 
00378         /***********************************************************************
00379         
00380                 Make the Token class compatible with IWriter instances.
00381 
00382         ***********************************************************************/
00383 
00384         void write (IWriter w)
00385         {
00386                 w.put (content);
00387         }
00388 
00389         /***********************************************************************
00390         
00391                 Parse a set of digits, optionally prefixed by a '+' or '-', 
00392                 and optionally with a '.' contained within. Delegates come
00393                 in quite handy here, and add relatively little overhead. 
00394 
00395         ***********************************************************************/
00396 
00397         private bool parseDigits (Accumulator dg, Accumulator fraction)
00398         {
00399                 bool            neg = false;
00400                 State           state = State.Begin; 
00401 
00402                 foreach (char c; content)
00403                          switch (state)   
00404                                 {
00405                                 case State.Begin:
00406                                      if (isspace (c))
00407                                          break;
00408 
00409                                      state = State.Number;
00410                                      if (c == '-')
00411                                         {
00412                                         neg = true;
00413                                         break;
00414                                         }
00415                                      else
00416                                         if (c == '+')
00417                                             break;
00418                                      // fall thru!
00419   
00420                                 case State.Number:
00421                                      if (c == '.' && fraction)
00422                                         {
00423                                         state = State.Fraction;
00424                                         dg = fraction;
00425                                         break;
00426                                         }
00427                                      // fall thru!
00428 
00429                                 case State.Fraction:
00430                                      if (isdigit (c))
00431                                          dg (c - '0');
00432                                      else
00433                                         throw Error;
00434                                      break;
00435 
00436                                 default:
00437                                 }
00438                 return neg;
00439         }
00440 }

Generated on Sun Nov 7 19:06:53 2004 for Mango by doxygen 1.3.6