00001 /******************************************************************************* 00002 00003 @file Token.d 00004 00005 Copyright (c) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 4. Derivative works are permitted, but they must carry this notice 00027 in full and credit the original source. 00028 00029 00030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00031 00032 00033 @version Initial version, March 2004 00034 Circular dependency split; Oct 2004 00035 00036 @author Kris, Chris Sauls 00037 00038 00039 *******************************************************************************/ 00040 00041 module mango.io.Token; 00042 00043 private import std.ctype; 00044 00045 private import mango.io.Exception; 00046 00047 private import mango.io.model.IWriter; 00048 00049 /******************************************************************************* 00050 00051 This provides backward compatability by importing the additional 00052 token styles into this namespace. 00053 00054 *******************************************************************************/ 00055 00056 public import mango.io.TokenEx; 00057 00058 00059 /******************************************************************************* 00060 00061 Tokens used by Tokenizer class. Tokens do not copy their content 00062 so they are quite useful for parsing quantites of data quickly. 00063 Conversely since each token is mapped into an external buffer, 00064 you should be aware that changes to said buffer will impact any 00065 tokens based upon it. You may sidestep this by using the clone() 00066 method, or toString().dup 00067 00068 Tokens can convert from a variety of numeric format to ascii text. 00069 Formats currently include int, uint, long, ulong, and real. Each 00070 number may be preceded by whitespace, and an optional '+' or '-' 00071 specifier. Note that real-number format is simplistic in that it 00072 does not support exponential declarations. Note the conversion 00073 methods should probably be moved elsewhere. 00074 00075 Here's a brief example of how to apply Token with Tokenizers: 00076 00077 @code 00078 // open a file for reading 00079 FileConduit fc = new FileConduit ("test.txt"); 00080 00081 // create a buffer for reading the file 00082 IBuffer buffer = fc.createBuffer (); 00083 00084 // create a token for receiving the line 00085 Token token = new Token; 00086 00087 // read file a line at a time. Method next() returns false when no more 00088 // delimiters are found. Note there may be an unterminated line at eof 00089 while (Tokenizers.line.next(buffer, token) || token.getLength()) 00090 Stdout.put(token).cr(); 00091 @endcode 00092 00093 See also BoundToken, ReaderToken, CompositeToken and HybridToken. 00094 00095 *******************************************************************************/ 00096 00097 class Token : IWritable 00098 { 00099 private int type; 00100 private char[] content; 00101 00102 private static TokenException Error; 00103 00104 private enum State {Begin, Number, Fraction}; 00105 00106 alias void delegate(int digit) Accumulator; 00107 00108 /*********************************************************************** 00109 00110 ***********************************************************************/ 00111 00112 private static real divisors[] = 00113 [ 00114 0.1, 00115 0.01, 00116 0.001, 00117 0.0001, 00118 0.00001, 00119 0.000001, 00120 0.0000001, 00121 0.00000001, 00122 0.000000001, 00123 0.0000000001, 00124 0.00000000001, 00125 0.000000000001, 00126 0.0000000000001, 00127 0.00000000000001, 00128 0.000000000000001, 00129 0.0000000000000001, 00130 0.00000000000000001, 00131 0.000000000000000001, 00132 0.0000000000000000001, 00133 0.00000000000000000001, 00134 0.000000000000000000001, 00135 0.0000000000000000000001, 00136 ]; 00137 00138 /*********************************************************************** 00139 00140 Construct a static exception instance. 00141 00142 ***********************************************************************/ 00143 00144 static this() 00145 { 00146 Error = new TokenException ("unexpected numeric format"); 00147 } 00148 00149 /*********************************************************************** 00150 00151 Set the content of this token. 00152 00153 ***********************************************************************/ 00154 00155 Token set (char[] content) 00156 { 00157 this.content = content; 00158 return this; 00159 } 00160 00161 /*********************************************************************** 00162 00163 Return the length of this token. 00164 00165 ***********************************************************************/ 00166 00167 int getLength () 00168 { 00169 return content.length; 00170 } 00171 00172 /*********************************************************************** 00173 00174 Set the type of this token. Token types can be useful when 00175 one wishes to categorize input patterns. 00176 00177 ***********************************************************************/ 00178 00179 Token setType (int type) 00180 { 00181 this.type = type; 00182 return this; 00183 } 00184 00185 /*********************************************************************** 00186 00187 Return the type associated with this token. See setType(). 00188 00189 ***********************************************************************/ 00190 00191 int getType () 00192 { 00193 return type; 00194 } 00195 00196 /*********************************************************************** 00197 00198 Convert this token to an integer. 00199 00200 ***********************************************************************/ 00201 00202 int toInt () 00203 { 00204 int i = 0; 00205 00206 void multiply (int digit) 00207 { 00208 i = i * 10 + digit; 00209 } 00210 00211 return parseDigits (&multiply, null) ? -i : i; 00212 } 00213 00214 /*********************************************************************** 00215 00216 Convert this token to an unsigned integer. 00217 00218 ***********************************************************************/ 00219 00220 uint toUInt () 00221 { 00222 uint i = 0; 00223 00224 void multiply (int digit) 00225 { 00226 i = i * 10 + digit; 00227 } 00228 00229 if (parseDigits (&multiply, null)) 00230 throw Error; 00231 return i; 00232 } 00233 00234 /*********************************************************************** 00235 00236 Convert this token to a long integer. 00237 00238 ***********************************************************************/ 00239 00240 long toLong () 00241 { 00242 long l = 0; 00243 00244 void multiply (int digit) 00245 { 00246 l = l * 10 + digit; 00247 } 00248 00249 return parseDigits (&multiply, null) ? -l : l; 00250 } 00251 00252 /*********************************************************************** 00253 00254 Convert this token to an unsigned long integer. 00255 00256 ***********************************************************************/ 00257 00258 ulong toULong () 00259 { 00260 ulong l = 0; 00261 00262 void multiply (int digit) 00263 { 00264 l = l * 10 + digit; 00265 } 00266 00267 if (parseDigits (&multiply, null)) 00268 throw Error; 00269 return l; 00270 } 00271 00272 /*********************************************************************** 00273 00274 Convert this token to a real. 00275 00276 ***********************************************************************/ 00277 00278 real toReal () 00279 { 00280 real r = 0, 00281 f = 0; 00282 int divisor = 0; 00283 00284 void whole (int c) 00285 { 00286 r = r * 10.0 + c; 00287 } 00288 00289 void fraction (int c) 00290 { 00291 f = f * 10.0 + c; 00292 ++divisor; 00293 } 00294 00295 bool neg = parseDigits (&whole, &fraction); 00296 00297 if (divisor) 00298 if (divisor < divisors.length) 00299 r += f * divisors [divisor-1]; 00300 else 00301 throw Error; 00302 00303 return neg ? -r : r; 00304 } 00305 00306 /*********************************************************************** 00307 00308 Clone this token, making a copy of the content also. 00309 00310 ***********************************************************************/ 00311 00312 Token clone () 00313 { 00314 Token clone = new Token; 00315 00316 clone.set (toString (false)); 00317 clone.type = type; 00318 return clone; 00319 } 00320 00321 /*********************************************************************** 00322 00323 Return a reference to this tokens content. Duplicate it 00324 only if 'slice' is explicitly set to false (defaults to 00325 a slice instead). 00326 00327 ***********************************************************************/ 00328 00329 char[] toString (bool slice = true) 00330 { 00331 if (slice) 00332 return content; 00333 return content.dup; 00334 } 00335 00336 /*********************************************************************** 00337 00338 Is this token equal to another? 00339 00340 ***********************************************************************/ 00341 00342 override int opEquals (Object o) 00343 { 00344 Token other = cast(Token) o; 00345 00346 if (other is null) 00347 return super.opEquals (o); 00348 return typeid(char[]).equals (&content, &other.content); 00349 //content.typeinfo.equals (content, other.content); 00350 } 00351 00352 /*********************************************************************** 00353 00354 Compare this token to another. 00355 00356 ***********************************************************************/ 00357 00358 override int opCmp (Object o) 00359 { 00360 Token other = cast(Token) o; 00361 00362 if (other is null) 00363 return super.opCmp (o); 00364 00365 return typeid(char[]).compare (&content, &other.content); 00366 //content.typeinfo.compare (content, other.content); 00367 } 00368 00369 /*********************************************************************** 00370 00371 Hash this token 00372 00373 ***********************************************************************/ 00374 00375 override uint toHash () 00376 { 00377 return typeid(char[]).getHash (&content); 00378 // content.typeinfo.getHash (content); 00379 } 00380 00381 /*********************************************************************** 00382 00383 Make the Token class compatible with IWriter instances. 00384 00385 ***********************************************************************/ 00386 00387 void write (IWriter w) 00388 { 00389 w.put (content); 00390 } 00391 00392 /*********************************************************************** 00393 00394 Parse a set of digits, optionally prefixed by a '+' or '-', 00395 and optionally with a '.' contained within. Delegates come 00396 in quite handy here, and add relatively little overhead. 00397 00398 ***********************************************************************/ 00399 00400 private bool parseDigits (Accumulator dg, Accumulator fraction) 00401 { 00402 bool neg = false; 00403 State state = State.Begin; 00404 00405 foreach (char c; content) 00406 switch (state) 00407 { 00408 case State.Begin: 00409 if (isspace (c)) 00410 break; 00411 00412 state = State.Number; 00413 if (c == '-') 00414 { 00415 neg = true; 00416 break; 00417 } 00418 else 00419 if (c == '+') 00420 break; 00421 // fall thru! 00422 00423 case State.Number: 00424 if (c == '.' && fraction) 00425 { 00426 state = State.Fraction; 00427 dg = fraction; 00428 break; 00429 } 00430 // fall thru! 00431 00432 case State.Fraction: 00433 if (isdigit (c)) 00434 dg (c - '0'); 00435 else 00436 throw Error; 00437 break; 00438 00439 default: 00440 } 00441 return neg; 00442 } 00443 }