00001 /******************************************************************************* 00002 00003 @file Token.d 00004 00005 Copyright (c) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 4. Derivative works are permitted, but they must carry this notice 00027 in full and credit the original source. 00028 00029 00030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00031 00032 00033 @version Initial version, March 2004 00034 Circular dependency split; Oct 2004 00035 2nd circular dependency split; March 2005 (dmd v0.115) 00036 00037 @author Kris, Chris Sauls 00038 00039 00040 *******************************************************************************/ 00041 00042 module mango.io.Token; 00043 00044 private import mango.format.Int, 00045 mango.format.Long, 00046 mango.format.Double; 00047 00048 private import mango.io.Tokenizer; 00049 00050 private import mango.io.model.IWriter, 00051 mango.io.model.IReader, 00052 mango.io.model.IConduit; 00053 00054 /******************************************************************************* 00055 00056 Tokens used by Tokenizer class. Tokens do not copy their content 00057 so they are quite useful for parsing quantites of data quickly. 00058 Conversely since each token is mapped into an external buffer, 00059 you should be aware that changes to said buffer will impact any 00060 tokens based upon it. You may sidestep this by using the clone() 00061 method, or toString().dup 00062 00063 Tokens can convert from a variety of numeric format to ascii text. 00064 Formats currently include int, uint, long, ulong, and real. Each 00065 number may be preceded by whitespace, and an optional '+' or '-' 00066 specifier. Note that real-number format is simplistic in that it 00067 does not support exponential declarations. Note the conversion 00068 methods should probably be moved elsewhere. 00069 00070 Here's a brief example of how to apply Token with Tokenizers: 00071 00072 @code 00073 // open a file for reading 00074 FileConduit fc = new FileConduit ("test.txt"); 00075 00076 // create a buffer for reading the file 00077 IBuffer buffer = fc.createBuffer; 00078 00079 // create a token for receiving the line 00080 Token token = new Token; 00081 00082 // read file a line at a time. Method next() returns false when no more 00083 // delimiters are found. Note there may be an unterminated line at eof 00084 while (Tokenizers.line.next(buffer, token) || token.getLength) 00085 Stdout (token) (CR); 00086 @endcode 00087 00088 See also BoundToken, ReaderToken, CompositeToken and HybridToken. 00089 00090 *******************************************************************************/ 00091 00092 class Token : IWritable 00093 { 00094 private int type; 00095 private char[] content; 00096 00097 /*********************************************************************** 00098 00099 Set the content of this token. 00100 00101 ***********************************************************************/ 00102 00103 Token set (char[] content) 00104 { 00105 this.content = content; 00106 return this; 00107 } 00108 00109 /*********************************************************************** 00110 00111 Return the length of this token. 00112 00113 ***********************************************************************/ 00114 00115 int getLength () 00116 { 00117 return content.length; 00118 } 00119 00120 /*********************************************************************** 00121 00122 Set the type of this token. Token types can be useful when 00123 one wishes to categorize input patterns. 00124 00125 ***********************************************************************/ 00126 00127 Token setType (int type) 00128 { 00129 this.type = type; 00130 return this; 00131 } 00132 00133 /*********************************************************************** 00134 00135 Return the type associated with this token. See setType(). 00136 00137 ***********************************************************************/ 00138 00139 int getType () 00140 { 00141 return type; 00142 } 00143 00144 /*********************************************************************** 00145 00146 Convert this token to an integer. 00147 00148 ***********************************************************************/ 00149 00150 int toInt () 00151 { 00152 return Int.parse (content); 00153 } 00154 00155 /*********************************************************************** 00156 00157 Convert this token to a long integer. 00158 00159 ***********************************************************************/ 00160 00161 long toLong () 00162 { 00163 return Long.parse (content); 00164 } 00165 00166 /*********************************************************************** 00167 00168 Convert this token to a real. 00169 00170 ***********************************************************************/ 00171 00172 real toReal () 00173 { 00174 return Double.parse (content); 00175 } 00176 00177 /*********************************************************************** 00178 00179 Clone this token, making a copy of the content also. 00180 00181 ***********************************************************************/ 00182 00183 Token clone () 00184 { 00185 Token clone = new Token; 00186 00187 clone.set (toString (false)); 00188 clone.type = type; 00189 return clone; 00190 } 00191 00192 /*********************************************************************** 00193 00194 Return a reference to this tokens content. Duplicate it 00195 only if 'slice' is explicitly set to false (defaults to 00196 a slice instead). 00197 00198 ***********************************************************************/ 00199 00200 char[] toString (bool slice = true) 00201 { 00202 if (slice) 00203 return content; 00204 return content.dup; 00205 } 00206 00207 /*********************************************************************** 00208 00209 Is this token equal to another? 00210 00211 ***********************************************************************/ 00212 00213 override int opEquals (Object o) 00214 { 00215 Token other = cast(Token) o; 00216 00217 if (other is null) 00218 return super.opEquals (o); 00219 return typeid(char[]).equals (&content, &other.content); 00220 } 00221 00222 /*********************************************************************** 00223 00224 Compare this token to another. 00225 00226 ***********************************************************************/ 00227 00228 override int opCmp (Object o) 00229 { 00230 Token other = cast(Token) o; 00231 00232 if (other is null) 00233 return super.opCmp (o); 00234 00235 return typeid(char[]).compare (&content, &other.content); 00236 } 00237 00238 /*********************************************************************** 00239 00240 Hash this token 00241 00242 ***********************************************************************/ 00243 00244 override uint toHash () 00245 { 00246 return typeid(char[]).getHash (&content); 00247 } 00248 00249 /*********************************************************************** 00250 00251 Make the Token class compatible with IWriter instances. 00252 00253 ***********************************************************************/ 00254 00255 void write (IWriter w) 00256 { 00257 w.put (content); 00258 } 00259 } 00260 00261 00262 /******************************************************************************* 00263 00264 A style of Token that's bound to a Tokenizer. This can be a handy 00265 means of cleaning up client code, and limiting the scope of how 00266 a token is used by recieving methods. 00267 00268 Contrast this example with that shown in the Token class: 00269 00270 @code 00271 // open a file for reading 00272 FileConduit fc = new FileConduit ("test.txt"); 00273 00274 // create a buffer for reading the file 00275 IBuffer buffer = fc.createBuffer; 00276 00277 // bind a line-tokenizer to our input token 00278 BoundToken line = new BoundToken (Tokenizers.line); 00279 00280 // read file a line at a time. Method next() returns false when no more 00281 // delimiters are found. Note there may be an unterminated line at eof 00282 while (line.next(buffer) || line.getLength) 00283 Stdout (line) (CR); 00284 @endcode 00285 00286 One might also consider a CompositeToken or HybridToken. 00287 00288 *******************************************************************************/ 00289 00290 class BoundToken : Token 00291 { 00292 private ITokenizer tk; 00293 00294 /*********************************************************************** 00295 00296 ***********************************************************************/ 00297 00298 this (ITokenizer tk) 00299 { 00300 this.tk = tk; 00301 } 00302 00303 /*********************************************************************** 00304 00305 Return the associated tokenizer 00306 00307 ***********************************************************************/ 00308 00309 ITokenizer getTokenizer () 00310 { 00311 return tk; 00312 } 00313 00314 /*********************************************************************** 00315 00316 Extract the next token from the provided buffer. 00317 00318 Returns true if a token was isolated, false if no more 00319 tokens were found. Note that one last token may still 00320 be present when this return false; this may happen if 00321 (for example) the last delimiter is missing before an 00322 EOF condition is seen. Check token.getLength() when 00323 this method returns false. 00324 00325 For example: 00326 00327 @code 00328 while (token.next() || token.getLength()) 00329 // do something 00330 00331 @endcode 00332 00333 ***********************************************************************/ 00334 00335 bool next (IBuffer buf) 00336 { 00337 return tk.next (buf, this); 00338 } 00339 } 00340 00341 00342 /******************************************************************************* 00343 00344 ReaderToken adapts a BoundToken such that it can be used directly 00345 with any IReader implementation. We just add the IReadable methods 00346 to the basic BoundToken. 00347 00348 Here's a contrived example of how to use ReaderToken: 00349 00350 @code 00351 // create a small buffer on the heap 00352 Buffer buf = new Buffer (256); 00353 00354 // write items with a comma between each 00355 TextWriter write = new TextWriter (buf, ","); 00356 00357 // write some stuff to the buffer 00358 write ("now is the time for all good men") (3.14159); 00359 00360 // bind a couple of tokens to a comma tokenizer 00361 ReaderToken text = new ReaderToken (Tokenizers.comma); 00362 ReaderToken number = new ReaderToken (Tokenizers.comma); 00363 00364 // create any old reader since we only use it for handling tokens 00365 Reader read = new Reader (buf); 00366 00367 // populate both tokens via reader 00368 read (text) (number); 00369 00370 // print them to the console 00371 Stdout (text) (':') (number) (CR); 00372 @endcode 00373 00374 *******************************************************************************/ 00375 00376 class ReaderToken : BoundToken, IReadable 00377 { 00378 /*********************************************************************** 00379 00380 Construct a ReaderToken using the provided Tokenizer. 00381 00382 ***********************************************************************/ 00383 00384 this (ITokenizer tk) 00385 { 00386 super (tk); 00387 } 00388 00389 /*********************************************************************** 00390 00391 Read the next delimited element into this token. 00392 00393 ***********************************************************************/ 00394 00395 void read (IReader r) 00396 { 00397 tk.next (r.getBuffer, this); 00398 } 00399 } 00400 00401 00402 /******************************************************************************* 00403 00404 Another subclass of BoundToken that combines both a Tokenizer and 00405 an input buffer. This is simply a convenience wrapper than takes 00406 care of details that would otherwise clutter the client code. 00407 00408 Compare this to usage of a basic Token: 00409 00410 @code 00411 // open a file for reading 00412 FileConduit fc = new FileConduit ("test.txt"); 00413 00414 // create a Token and bind it to both the file and a line-tokenizer 00415 CompositeToken line = new CompositeToken (Tokenizers.line, fc); 00416 00417 // read file a line at a time. Method get() returns false when no more 00418 // tokens are found. 00419 while (line.get) 00420 Stdout (line) (CR); 00421 @endcode 00422 00423 You might also consider a HybridToken for further processing of 00424 token content. 00425 00426 *******************************************************************************/ 00427 00428 class CompositeToken : BoundToken 00429 { 00430 private IBuffer buffer; 00431 00432 /*********************************************************************** 00433 00434 Set this token to use the provided Tokenizer, and bind it 00435 to the given buffer. 00436 00437 ***********************************************************************/ 00438 00439 this (ITokenizer tk, IBuffer buffer) 00440 { 00441 super (tk); 00442 this.buffer = buffer; 00443 } 00444 00445 /*********************************************************************** 00446 00447 Set this token to use the provided Tokenizer, and bind it 00448 to the buffer associated with the given conduit. 00449 00450 ***********************************************************************/ 00451 00452 this (ITokenizer tk, IConduit conduit) 00453 { 00454 this (tk, conduit.createBuffer); 00455 } 00456 00457 /*********************************************************************** 00458 00459 Return the associated buffer 00460 00461 ***********************************************************************/ 00462 00463 IBuffer getBuffer () 00464 { 00465 return buffer; 00466 } 00467 00468 /*********************************************************************** 00469 00470 Extract the next token. 00471 00472 Returns true if a token was isolated, false if no more 00473 tokens were found. Note that one last token may still 00474 be present when this return false; this may happen if 00475 (for example) the last delimiter is missing before an 00476 Eof condition is seen. Check token.getLength() when 00477 this method returns false. 00478 00479 For example: 00480 00481 @code 00482 while (token.next || token.getLength) 00483 // do something 00484 00485 @endcode 00486 00487 ***********************************************************************/ 00488 00489 bool next () 00490 { 00491 return tk.next (buffer, this); 00492 } 00493 00494 /*********************************************************************** 00495 00496 Extract the next token, taking Eof into consideration. 00497 If next() returns false, then this function will still 00498 return true as long as there's some content available. 00499 00500 For example: 00501 00502 @code 00503 while (token.get) 00504 // do something 00505 00506 @endcode 00507 00508 ***********************************************************************/ 00509 00510 bool get () 00511 { 00512 return next || getLength; 00513 } 00514 } 00515 00516 00517 /******************************************************************************* 00518 00519 A subclass of CompositeToken that combines a Tokenizer, an input buffer, 00520 and the means to bind its content to a subordinate Reader or Token. 00521 This is another convenience wrapper than takes care of details that 00522 would otherwise complicate client code. 00523 00524 Compare this to usage of a CompositeToken: 00525 00526 @code 00527 // open a file for reading 00528 FileConduit fc = new FileConduit ("test.txt"); 00529 00530 // create a Token and bind it to both the file and a line-tokenizer 00531 HybridToken line = new HybridToken (Tokenizers.line, fc); 00532 00533 // now create a reader upon the token 00534 Reader input = new Reader (line.getHost); 00535 00536 // read file a line at a time. Method get() returns false when no more 00537 // tokens are found. 00538 while (line.get) 00539 { 00540 int x, y; 00541 00542 // reader is now bound to the content of the current line 00543 input (x) (y); 00544 00545 Stdout (x) (y) (CR); 00546 } 00547 @endcode 00548 00549 You can use the same mechanism to bind subordinate Tokens: 00550 00551 @code 00552 // open a file for reading 00553 FileConduit fc = new FileConduit ("test.txt"); 00554 00555 // create a Token and bind it to both the file and a line-tokenizer 00556 HybridToken line = new HybridToken (Tokenizers.line, fc); 00557 00558 // now create a subordinate Token that splits on whitespace 00559 CompositeToken word = new CompositeToken (Tokenizers.space, line.getHost); 00560 00561 // read file a line at a time. Method get() returns false when no more 00562 // tokens are found. 00563 while (line.get) 00564 // extract space delimited tokens from each line 00565 while (word.get) 00566 Stdout (word) (CR); 00567 @endcode 00568 00569 00570 *******************************************************************************/ 00571 00572 class HybridToken : CompositeToken 00573 { 00574 private IBuffer host; 00575 00576 /*********************************************************************** 00577 00578 Set this token to use the provided Tokenizer, and bind it 00579 to the given buffer. 00580 00581 ***********************************************************************/ 00582 00583 this (ITokenizer tk, IBuffer buffer) 00584 { 00585 super (tk, buffer); 00586 00587 // create the hosting IBuffer 00588 host = buffer.create; 00589 } 00590 00591 /*********************************************************************** 00592 00593 Set this token to use the provided Tokenizer, and bind it 00594 to the buffer associated with the given conduit. 00595 00596 ***********************************************************************/ 00597 00598 this (ITokenizer tk, IConduit conduit) 00599 { 00600 this (tk, conduit.createBuffer); 00601 } 00602 00603 /*********************************************************************** 00604 00605 Return the associated host buffer. The host should be used 00606 for purposes of binding a subordinate Token or Reader onto 00607 the content of this token. Each call to next() will update 00608 this content appropriately, which is also reflected within 00609 said host buffer. 00610 00611 That is, token.toString == token.getHost.toString. 00612 00613 ***********************************************************************/ 00614 00615 IBuffer getHost () 00616 { 00617 return host; 00618 } 00619 00620 /*********************************************************************** 00621 00622 Extract the next token. 00623 00624 Returns true if a token was isolated, false if no more 00625 tokens were found. Note that one last token may still 00626 be present when this return false; this may happen if 00627 (for example) the last delimiter is missing before an 00628 Eof condition is seen. Check token.getLength() when 00629 this method returns false. 00630 00631 For example: 00632 00633 @code 00634 while (token.next || token.getLength) 00635 // do something 00636 00637 @endcode 00638 00639 ***********************************************************************/ 00640 00641 bool next () 00642 { 00643 // get the next token 00644 bool ret = super.next; 00645 00646 // set host content 00647 host.setValidContent (toString); 00648 00649 return ret; 00650 } 00651 }