00001 /******************************************************************************* 00002 00003 @file Token.d 00004 00005 Copyright (C) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 00027 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00028 00029 00030 @version Initial version, March 2004 00031 Circular dependency split; Oct 2004 00032 00033 @author Kris, Chris Sauls 00034 00035 00036 *******************************************************************************/ 00037 00038 module mango.io.Token; 00039 00040 private import std.ctype; 00041 00042 private import mango.io.Exception; 00043 00044 private import mango.io.model.IWriter; 00045 00046 /******************************************************************************* 00047 00048 This provides backward compatability by importing the additional 00049 token styles into this namespace. 00050 00051 *******************************************************************************/ 00052 00053 public import mango.io.TokenEx; 00054 00055 00056 /******************************************************************************* 00057 00058 Tokens used by Tokenizer class. Tokens do not copy their content 00059 so they are quite useful for parsing quantites of data quickly. 00060 Conversely since each token is mapped into an external buffer, 00061 you should be aware that changes to said buffer will impact any 00062 tokens based upon it. You may sidestep this by using the clone() 00063 method, or toString().dup 00064 00065 Tokens can convert from a variety of numeric format to ascii text. 00066 Formats currently include int, uint, long, ulong, and real. Each 00067 number may be preceded by whitespace, and an optional '+' or '-' 00068 specifier. Note that real-number format is simplistic in that it 00069 does not support exponential declarations. Note the conversion 00070 methods should probably be moved elsewhere. 00071 00072 Here's a brief example of how to apply Token with Tokenizers: 00073 00074 @code 00075 // open a file for reading 00076 FileConduit fc = new FileConduit ("test.txt"); 00077 00078 // create a buffer for reading the file 00079 IBuffer buffer = fc.createBuffer (); 00080 00081 // create a token for receiving the line 00082 Token token = new Token; 00083 00084 // read file a line at a time. Method next() returns false when no more 00085 // delimiters are found. Note there may be an unterminated line at eof 00086 while (Tokenizers.line.next(buffer, token) || token.getLength()) 00087 Stdout.put(token).cr(); 00088 @endcode 00089 00090 See also BoundToken, ReaderToken, CompositeToken and HybridToken. 00091 00092 *******************************************************************************/ 00093 00094 class Token : IWritable 00095 { 00096 private int type; 00097 private char[] content; 00098 00099 private static TokenException Error; 00100 00101 private enum State {Begin, Number, Fraction}; 00102 00103 alias void delegate(int digit) Accumulator; 00104 00105 /*********************************************************************** 00106 00107 ***********************************************************************/ 00108 00109 private static real divisors[] = 00110 [ 00111 0.1, 00112 0.01, 00113 0.001, 00114 0.0001, 00115 0.00001, 00116 0.000001, 00117 0.0000001, 00118 0.00000001, 00119 0.000000001, 00120 0.0000000001, 00121 0.00000000001, 00122 0.000000000001, 00123 0.0000000000001, 00124 0.00000000000001, 00125 0.000000000000001, 00126 0.0000000000000001, 00127 0.00000000000000001, 00128 0.000000000000000001, 00129 0.0000000000000000001, 00130 0.00000000000000000001, 00131 0.000000000000000000001, 00132 0.0000000000000000000001, 00133 ]; 00134 00135 /*********************************************************************** 00136 00137 Construct a static exception instance. 00138 00139 ***********************************************************************/ 00140 00141 static this() 00142 { 00143 Error = new TokenException ("unexpected numeric format"); 00144 } 00145 00146 /*********************************************************************** 00147 00148 Set the content of this token. 00149 00150 ***********************************************************************/ 00151 00152 Token set (char[] content) 00153 { 00154 this.content = content; 00155 return this; 00156 } 00157 00158 /*********************************************************************** 00159 00160 Return the length of this token. 00161 00162 ***********************************************************************/ 00163 00164 int getLength () 00165 { 00166 return content.length; 00167 } 00168 00169 /*********************************************************************** 00170 00171 Set the type of this token. Token types can be useful when 00172 one wishes to categorize input patterns. 00173 00174 ***********************************************************************/ 00175 00176 Token setType (int type) 00177 { 00178 this.type = type; 00179 return this; 00180 } 00181 00182 /*********************************************************************** 00183 00184 Return the type associated with this token. See setType(). 00185 00186 ***********************************************************************/ 00187 00188 int getType () 00189 { 00190 return type; 00191 } 00192 00193 /*********************************************************************** 00194 00195 Convert this token to an integer. 00196 00197 ***********************************************************************/ 00198 00199 int toInt () 00200 { 00201 int i = 0; 00202 00203 void multiply (int digit) 00204 { 00205 i = i * 10 + digit; 00206 } 00207 00208 return parseDigits (&multiply, null) ? -i : i; 00209 } 00210 00211 /*********************************************************************** 00212 00213 Convert this token to an unsigned integer. 00214 00215 ***********************************************************************/ 00216 00217 uint toUInt () 00218 { 00219 uint i = 0; 00220 00221 void multiply (int digit) 00222 { 00223 i = i * 10 + digit; 00224 } 00225 00226 if (parseDigits (&multiply, null)) 00227 throw Error; 00228 return i; 00229 } 00230 00231 /*********************************************************************** 00232 00233 Convert this token to a long integer. 00234 00235 ***********************************************************************/ 00236 00237 long toLong () 00238 { 00239 long l = 0; 00240 00241 void multiply (int digit) 00242 { 00243 l = l * 10 + digit; 00244 } 00245 00246 return parseDigits (&multiply, null) ? -l : l; 00247 } 00248 00249 /*********************************************************************** 00250 00251 Convert this token to an unsigned long integer. 00252 00253 ***********************************************************************/ 00254 00255 ulong toULong () 00256 { 00257 ulong l = 0; 00258 00259 void multiply (int digit) 00260 { 00261 l = l * 10 + digit; 00262 } 00263 00264 if (parseDigits (&multiply, null)) 00265 throw Error; 00266 return l; 00267 } 00268 00269 /*********************************************************************** 00270 00271 Convert this token to a real. 00272 00273 ***********************************************************************/ 00274 00275 real toReal () 00276 { 00277 real r = 0, 00278 f = 0; 00279 int divisor = 0; 00280 00281 void whole (int c) 00282 { 00283 r = r * 10.0 + c; 00284 } 00285 00286 void fraction (int c) 00287 { 00288 f = f * 10.0 + c; 00289 ++divisor; 00290 } 00291 00292 bool neg = parseDigits (&whole, &fraction); 00293 00294 if (divisor) 00295 if (divisor < divisors.length) 00296 r += f * divisors [divisor-1]; 00297 else 00298 throw Error; 00299 00300 return neg ? -r : r; 00301 } 00302 00303 /*********************************************************************** 00304 00305 Clone this token, making a copy of the content also. 00306 00307 ***********************************************************************/ 00308 00309 Token clone () 00310 { 00311 Token clone = new Token; 00312 00313 clone.set (toString (false)); 00314 clone.type = type; 00315 return clone; 00316 } 00317 00318 /*********************************************************************** 00319 00320 Return a reference to this tokens content. Duplicate it 00321 only if 'slice' is explicitly set to false (defaults to 00322 a slice instead). 00323 00324 ***********************************************************************/ 00325 00326 char[] toString (bool slice = true) 00327 { 00328 if (slice) 00329 return content; 00330 return content.dup; 00331 } 00332 00333 /*********************************************************************** 00334 00335 Is this token equal to another? 00336 00337 ***********************************************************************/ 00338 00339 override int opEquals (Object o) 00340 { 00341 Token other = cast(Token) o; 00342 00343 if (other is null) 00344 return super.opEquals (o); 00345 return typeid(char[]).equals (&content, &other.content); 00346 //content.typeinfo.equals (content, other.content); 00347 } 00348 00349 /*********************************************************************** 00350 00351 Compare this token to another. 00352 00353 ***********************************************************************/ 00354 00355 override int opCmp (Object o) 00356 { 00357 Token other = cast(Token) o; 00358 00359 if (other is null) 00360 return super.opCmp (o); 00361 00362 return typeid(char[]).compare (&content, &other.content); 00363 //content.typeinfo.compare (content, other.content); 00364 } 00365 00366 /*********************************************************************** 00367 00368 Hash this token 00369 00370 ***********************************************************************/ 00371 00372 override uint toHash () 00373 { 00374 return typeid(char[]).getHash (&content); 00375 // content.typeinfo.getHash (content); 00376 } 00377 00378 /*********************************************************************** 00379 00380 Make the Token class compatible with IWriter instances. 00381 00382 ***********************************************************************/ 00383 00384 void write (IWriter w) 00385 { 00386 w.put (content); 00387 } 00388 00389 /*********************************************************************** 00390 00391 Parse a set of digits, optionally prefixed by a '+' or '-', 00392 and optionally with a '.' contained within. Delegates come 00393 in quite handy here, and add relatively little overhead. 00394 00395 ***********************************************************************/ 00396 00397 private bool parseDigits (Accumulator dg, Accumulator fraction) 00398 { 00399 bool neg = false; 00400 State state = State.Begin; 00401 00402 foreach (char c; content) 00403 switch (state) 00404 { 00405 case State.Begin: 00406 if (isspace (c)) 00407 break; 00408 00409 state = State.Number; 00410 if (c == '-') 00411 { 00412 neg = true; 00413 break; 00414 } 00415 else 00416 if (c == '+') 00417 break; 00418 // fall thru! 00419 00420 case State.Number: 00421 if (c == '.' && fraction) 00422 { 00423 state = State.Fraction; 00424 dg = fraction; 00425 break; 00426 } 00427 // fall thru! 00428 00429 case State.Fraction: 00430 if (isdigit (c)) 00431 dg (c - '0'); 00432 else 00433 throw Error; 00434 break; 00435 00436 default: 00437 } 00438 return neg; 00439 } 00440 }