00001 /******************************************************************************* 00002 00003 @file Token.d 00004 00005 Copyright (c) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 4. Derivative works are permitted, but they must carry this notice 00027 in full and credit the original source. 00028 00029 00030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00031 00032 00033 @version Initial version, March 2004 00034 Circular dependency split; Oct 2004 00035 2nd circular dependency split; March 2005 (dmd v0.115) 00036 00037 @author Kris, Chris Sauls 00038 00039 00040 *******************************************************************************/ 00041 00042 module mango.io.Token; 00043 00044 private import mango.io.Buffer, 00045 mango.io.Tokenizer; 00046 00047 private import mango.convert.Atoi, 00048 mango.convert.Double; 00049 00050 private import mango.io.model.IWriter, 00051 mango.io.model.IReader, 00052 mango.io.model.IConduit; 00053 00054 /******************************************************************************* 00055 00056 Tokens used by Tokenizer class. Tokens do not copy their content 00057 so they are quite useful for parsing quantites of data quickly. 00058 Conversely since each token is mapped into an external buffer, 00059 you should be aware that changes to said buffer will impact any 00060 tokens based upon it. You may sidestep this by using the clone() 00061 method, or toString().dup 00062 00063 Tokens can convert from a variety of numeric format to ascii text. 00064 Formats currently include int, uint, long, ulong, and real. Each 00065 number may be preceded by whitespace, and an optional '+' or '-' 00066 specifier. Note that real-number format is simplistic in that it 00067 does not support exponential declarations. Note the conversion 00068 methods should probably be moved elsewhere. 00069 00070 Here's a brief example of how to apply Token with Tokenizers: 00071 00072 @code 00073 // open a file for reading 00074 FileConduit fc = new FileConduit ("test.txt"); 00075 00076 // create a buffer for reading the file 00077 IBuffer buffer = new Buffer (fc); 00078 00079 // create a token for receiving the line 00080 Token token = new Token; 00081 00082 // read file a line at a time. Method next() returns false when no more 00083 // delimiters are found. Note there may be an unterminated line at eof 00084 while (Tokenizers.line.next(buffer, token) || token.getLength) 00085 Stdout (token) (CR); 00086 @endcode 00087 00088 See also BoundToken, ReaderToken, CompositeToken and HybridToken. 00089 00090 *******************************************************************************/ 00091 00092 class TokenTemplate(T) : IWritable 00093 { 00094 private int type; 00095 private T[] content; 00096 00097 /*********************************************************************** 00098 00099 Set the content of this token. 00100 00101 ***********************************************************************/ 00102 00103 TokenTemplate set (T[] content) 00104 { 00105 this.content = content; 00106 return this; 00107 } 00108 00109 /*********************************************************************** 00110 00111 Return the length of this token. 00112 00113 ***********************************************************************/ 00114 00115 int getLength () 00116 { 00117 return content.length; 00118 } 00119 00120 /*********************************************************************** 00121 00122 Set the type of this token. Token types can be useful when 00123 one wishes to categorize input patterns. 00124 00125 ***********************************************************************/ 00126 00127 TokenTemplate setType (int type) 00128 { 00129 this.type = type; 00130 return this; 00131 } 00132 00133 /*********************************************************************** 00134 00135 Return the type associated with this token. See setType(). 00136 00137 ***********************************************************************/ 00138 00139 int getType () 00140 { 00141 return type; 00142 } 00143 00144 /*********************************************************************** 00145 00146 Convert this token to an integer. 00147 00148 ***********************************************************************/ 00149 00150 int toInt () 00151 { 00152 return AtoiTemplate!(T).parse (content); 00153 } 00154 00155 /*********************************************************************** 00156 00157 Convert this token to a long integer. 00158 00159 ***********************************************************************/ 00160 00161 long toLong () 00162 { 00163 return AtoiTemplate!(T).parse (content); 00164 } 00165 00166 /*********************************************************************** 00167 00168 Convert this token to a real. 00169 00170 ***********************************************************************/ 00171 00172 real toReal () 00173 { 00174 return DoubleTemplate!(T).parse (content); 00175 } 00176 00177 /*********************************************************************** 00178 00179 Clone this token, making a copy of the content also. 00180 00181 ***********************************************************************/ 00182 00183 TokenTemplate clone () 00184 { 00185 Token clone = new Token; 00186 00187 clone.set (contents (false)); 00188 clone.type = type; 00189 return clone; 00190 } 00191 00192 /*********************************************************************** 00193 00194 Return a reference to this tokens content. Duplicate it 00195 only if 'slice' is explicitly set to false (defaults to 00196 a slice instead). 00197 00198 ***********************************************************************/ 00199 00200 T[] contents (bool slice = true) 00201 { 00202 if (slice) 00203 return content; 00204 return content.dup; 00205 } 00206 00207 /*********************************************************************** 00208 00209 Is this token equal to another? 00210 00211 ***********************************************************************/ 00212 00213 override int opEquals (Object o) 00214 { 00215 Token other = cast(Token) o; 00216 00217 if (other is null) 00218 return super.opEquals (o); 00219 return typeid(T[]).equals (&content, &other.content); 00220 } 00221 00222 /*********************************************************************** 00223 00224 Compare this token to another. 00225 00226 ***********************************************************************/ 00227 00228 override int opCmp (Object o) 00229 { 00230 Token other = cast(Token) o; 00231 00232 if (other is null) 00233 return super.opCmp (o); 00234 00235 return typeid(T[]).compare (&content, &other.content); 00236 } 00237 00238 /*********************************************************************** 00239 00240 Hash this token 00241 00242 ***********************************************************************/ 00243 00244 override uint toHash () 00245 { 00246 return typeid(T[]).getHash (&content); 00247 } 00248 00249 /*********************************************************************** 00250 00251 Make the Token class compatible with IWriter instances. 00252 00253 ***********************************************************************/ 00254 00255 void write (IWriter w) 00256 { 00257 w.put (content); 00258 } 00259 } 00260 00261 alias TokenTemplate!(char) Token; 00262 00263 00264 /******************************************************************************* 00265 00266 A style of Token that's bound to a Tokenizer. This can be a handy 00267 means of cleaning up client code, and limiting the scope of how 00268 a token is used by recieving methods. 00269 00270 Contrast this example with that shown in the Token class: 00271 00272 @code 00273 // open a file for reading 00274 FileConduit fc = new FileConduit ("test.txt"); 00275 00276 // create a buffer for reading the file 00277 IBuffer buffer = new Buffer(fc); 00278 00279 // bind a line-tokenizer to our input token 00280 BoundToken line = new BoundToken (Tokenizers.line); 00281 00282 // read file a line at a time. Method next() returns false when no more 00283 // delimiters are found. Note there may be an unterminated line at eof 00284 while (line.next(buffer) || line.getLength) 00285 Stdout (line) (CR); 00286 @endcode 00287 00288 One might also consider a CompositeToken or HybridToken. 00289 00290 *******************************************************************************/ 00291 00292 class BoundTokenTemplate(T) : TokenTemplate!(T) 00293 { 00294 private ITokenizer tk; 00295 00296 /*********************************************************************** 00297 00298 ***********************************************************************/ 00299 00300 this (ITokenizer tk) 00301 { 00302 this.tk = tk; 00303 } 00304 00305 /*********************************************************************** 00306 00307 Return the associated tokenizer 00308 00309 ***********************************************************************/ 00310 00311 ITokenizer getTokenizer () 00312 { 00313 return tk; 00314 } 00315 00316 /*********************************************************************** 00317 00318 Extract the next token from the provided buffer. 00319 00320 Returns true if a token was isolated, false if no more 00321 tokens were found. Note that one last token may still 00322 be present when this return false; this may happen if 00323 (for example) the last delimiter is missing before an 00324 EOF condition is seen. Check token.getLength() when 00325 this method returns false. 00326 00327 For example: 00328 00329 @code 00330 while (token.next() || token.getLength()) 00331 // do something 00332 00333 @endcode 00334 00335 ***********************************************************************/ 00336 00337 bool next (IBuffer buf) 00338 { 00339 return tk.next (buf, this); 00340 } 00341 } 00342 00343 alias BoundTokenTemplate!(char) BoundToken; 00344 00345 00346 /******************************************************************************* 00347 00348 ReaderToken adapts a BoundToken such that it can be used directly 00349 with any IReader implementation. We just add the IReadable methods 00350 to the basic BoundToken. 00351 00352 Here's a contrived example of how to use ReaderToken: 00353 00354 @code 00355 // create a small buffer on the heap 00356 Buffer buf = new Buffer (256); 00357 00358 // write items with a comma between each 00359 TextWriter write = new TextWriter (buf, ","); 00360 00361 // write some stuff to the buffer 00362 write ("now is the time for all good men") (3.14159); 00363 00364 // bind a couple of tokens to a comma tokenizer 00365 ReaderToken text = new ReaderToken (Tokenizers.comma); 00366 ReaderToken number = new ReaderToken (Tokenizers.comma); 00367 00368 // create any old reader since we only use it for handling tokens 00369 Reader read = new Reader (buf); 00370 00371 // populate both tokens via reader 00372 read (text) (number); 00373 00374 // print them to the console 00375 Stdout (text) (':') (number) (CR); 00376 @endcode 00377 00378 *******************************************************************************/ 00379 00380 class ReaderTokenTemplate(T) : BoundTokenTemplate!(T), IReadable 00381 { 00382 /*********************************************************************** 00383 00384 Construct a ReaderToken using the provided Tokenizer. 00385 00386 ***********************************************************************/ 00387 00388 this (ITokenizer tk) 00389 { 00390 super (tk); 00391 } 00392 00393 /*********************************************************************** 00394 00395 Read the next delimited element into this token. 00396 00397 ***********************************************************************/ 00398 00399 void read (IReader r) 00400 { 00401 tk.next (r.getBuffer, this); 00402 } 00403 } 00404 00405 alias ReaderTokenTemplate!(char) ReaderToken; 00406 00407 00408 /******************************************************************************* 00409 00410 Another subclass of BoundToken that combines both a Tokenizer and 00411 an input buffer. This is simply a convenience wrapper than takes 00412 care of details that would otherwise clutter the client code. 00413 00414 Compare this to usage of a basic Token: 00415 00416 @code 00417 // open a file for reading 00418 FileConduit fc = new FileConduit ("test.txt"); 00419 00420 // create a Token and bind it to both the file and a line-tokenizer 00421 CompositeToken line = new CompositeToken (Tokenizers.line, fc); 00422 00423 // read file a line at a time. Method get() returns false when no more 00424 // tokens are found. 00425 while (line.get) 00426 Stdout (line) (CR); 00427 @endcode 00428 00429 You might also consider a HybridToken for further processing of 00430 token content. 00431 00432 *******************************************************************************/ 00433 00434 class CompositeTokenTemplate(T) : BoundTokenTemplate!(T) 00435 { 00436 private IBuffer buffer; 00437 00438 /*********************************************************************** 00439 00440 Set this token to use the provided Tokenizer, and bind it 00441 to the given buffer. 00442 00443 ***********************************************************************/ 00444 00445 this (ITokenizer tk, IBuffer buffer) 00446 { 00447 super (tk); 00448 this.buffer = buffer; 00449 } 00450 00451 /*********************************************************************** 00452 00453 Set this token to use the provided Tokenizer, and bind it 00454 to the buffer associated with the given conduit. 00455 00456 ***********************************************************************/ 00457 00458 this (ITokenizer tk, IConduit conduit) 00459 { 00460 this (tk, new Buffer(conduit)); 00461 } 00462 00463 /*********************************************************************** 00464 00465 Return the associated buffer 00466 00467 ***********************************************************************/ 00468 00469 IBuffer getBuffer () 00470 { 00471 return buffer; 00472 } 00473 00474 /*********************************************************************** 00475 00476 Extract the next token. 00477 00478 Returns true if a token was isolated, false if no more 00479 tokens were found. Note that one last token may still 00480 be present when this return false; this may happen if 00481 (for example) the last delimiter is missing before an 00482 Eof condition is seen. Check token.getLength() when 00483 this method returns false. 00484 00485 For example: 00486 00487 @code 00488 while (token.next || token.getLength) 00489 // do something 00490 00491 @endcode 00492 00493 ***********************************************************************/ 00494 00495 bool next () 00496 { 00497 return tk.next (buffer, this); 00498 } 00499 00500 /*********************************************************************** 00501 00502 Extract the next token, taking Eof into consideration. 00503 If next() returns false, then this function will still 00504 return true as long as there's some content available. 00505 00506 For example: 00507 00508 @code 00509 while (token.get) 00510 // do something 00511 00512 @endcode 00513 00514 ***********************************************************************/ 00515 00516 bool get () 00517 { 00518 return next || getLength; 00519 } 00520 } 00521 00522 alias CompositeTokenTemplate!(char) CompositeToken; 00523 00524 00525 /******************************************************************************* 00526 00527 A subclass of CompositeToken that combines a Tokenizer, an input buffer, 00528 and the means to bind its content to a subordinate Reader or Token. 00529 This is another convenience wrapper than takes care of details that 00530 would otherwise complicate client code. 00531 00532 Compare this to usage of a CompositeToken: 00533 00534 @code 00535 // open a file for reading 00536 FileConduit fc = new FileConduit ("test.txt"); 00537 00538 // create a Token and bind it to both the file and a line-tokenizer 00539 HybridToken line = new HybridToken (Tokenizers.line, fc); 00540 00541 // now create a reader upon the token 00542 Reader input = new Reader (line.getHost); 00543 00544 // read file a line at a time. Method get() returns false when no more 00545 // tokens are found. 00546 while (line.get) 00547 { 00548 int x, y; 00549 00550 // reader is now bound to the content of the current line 00551 input (x) (y); 00552 00553 Stdout (x) (y) (CR); 00554 } 00555 @endcode 00556 00557 You can use the same mechanism to bind subordinate Tokens: 00558 00559 @code 00560 // open a file for reading 00561 FileConduit fc = new FileConduit ("test.txt"); 00562 00563 // create a Token and bind it to both the file and a line-tokenizer 00564 HybridToken line = new HybridToken (Tokenizers.line, fc); 00565 00566 // now create a subordinate Token that splits on whitespace 00567 CompositeToken word = new CompositeToken (Tokenizers.space, line.getHost); 00568 00569 // read file a line at a time. Method get() returns false when no more 00570 // tokens are found. 00571 while (line.get) 00572 // extract space delimited tokens from each line 00573 while (word.get) 00574 Stdout (word) (CR); 00575 @endcode 00576 00577 00578 *******************************************************************************/ 00579 00580 class HybridTokenTemplate(T) : CompositeTokenTemplate!(T) 00581 { 00582 private IBuffer host; 00583 00584 /*********************************************************************** 00585 00586 Set this token to use the provided Tokenizer, and bind it 00587 to the given buffer. 00588 00589 ***********************************************************************/ 00590 00591 this (ITokenizer tk, IBuffer buffer) 00592 { 00593 super (tk, buffer); 00594 00595 // create the hosting IBuffer 00596 host = new Buffer(); 00597 } 00598 00599 /*********************************************************************** 00600 00601 Set this token to use the provided Tokenizer, and bind it 00602 to the buffer associated with the given conduit. 00603 00604 ***********************************************************************/ 00605 00606 this (ITokenizer tk, IConduit conduit) 00607 { 00608 this (tk, new Buffer(conduit)); 00609 } 00610 00611 /*********************************************************************** 00612 00613 Return the associated host buffer. The host should be used 00614 for purposes of binding a subordinate Token or Reader onto 00615 the content of this token. Each call to next() will update 00616 this content appropriately, which is also reflected within 00617 said host buffer. 00618 00619 That is, token.toString == token.getHost.toString. 00620 00621 ***********************************************************************/ 00622 00623 IBuffer getHost () 00624 { 00625 return host; 00626 } 00627 00628 /*********************************************************************** 00629 00630 Extract the next token. 00631 00632 Returns true if a token was isolated, false if no more 00633 tokens were found. Note that one last token may still 00634 be present when this return false; this may happen if 00635 (for example) the last delimiter is missing before an 00636 Eof condition is seen. Check token.getLength() when 00637 this method returns false. 00638 00639 For example: 00640 00641 @code 00642 while (token.next || token.getLength) 00643 // do something 00644 00645 @endcode 00646 00647 ***********************************************************************/ 00648 00649 bool next () 00650 { 00651 // get the next token 00652 bool ret = super.next; 00653 00654 // set host content 00655 host.setValidContent (toString); 00656 00657 return ret; 00658 } 00659 } 00660 00661 alias HybridTokenTemplate!(char) HybridToken; 00662