Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

Token.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file Token.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032 
00033         @version        Initial version, March 2004      
00034                         Circular dependency split; Oct 2004  
00035                         2nd circular dependency split; March 2005 (dmd v0.115)
00036 
00037         @author         Kris, Chris Sauls
00038 
00039 
00040 *******************************************************************************/
00041 
00042 module mango.io.Token;
00043 
00044 private import  mango.format.Int,
00045                 mango.format.Long,
00046                 mango.format.Double;
00047 
00048 private import  mango.io.Tokenizer;
00049 
00050 private import  mango.io.model.IWriter,
00051                 mango.io.model.IReader,
00052                 mango.io.model.IConduit;
00053 
00054 /*******************************************************************************
00055 
00056         Tokens used by Tokenizer class. Tokens do not copy their content
00057         so they are quite useful for parsing quantites of data quickly. 
00058         Conversely since each token is mapped into an external buffer, 
00059         you should be aware that changes to said buffer will impact any
00060         tokens based upon it. You may sidestep this by using the clone()
00061         method, or toString().dup
00062 
00063         Tokens can convert from a variety of numeric format to ascii text.
00064         Formats currently include int, uint, long, ulong, and real. Each
00065         number may be preceded by whitespace, and an optional '+' or '-'
00066         specifier. Note that real-number format is simplistic in that it 
00067         does not support exponential declarations.  Note the conversion 
00068         methods should probably be moved elsewhere.
00069 
00070         Here's a brief example of how to apply Token with Tokenizers:
00071 
00072         @code
00073         // open a file for reading
00074         FileConduit fc = new FileConduit ("test.txt");
00075 
00076         // create a buffer for reading the file
00077         IBuffer buffer = fc.createBuffer;
00078 
00079         // create a token for receiving the line
00080         Token token = new Token;
00081 
00082         // read file a line at a time. Method next() returns false when no more 
00083         // delimiters are found. Note there may be an unterminated line at eof
00084         while (Tokenizers.line.next(buffer, token) || token.getLength)
00085                Stdout (token) (CR);
00086         @endcode
00087 
00088         See also BoundToken, ReaderToken, CompositeToken and HybridToken.
00089 
00090 *******************************************************************************/
00091 
00092 class Token : IWritable
00093 { 
00094         private int     type;
00095         private char[]  content;
00096 
00097         /***********************************************************************
00098         
00099                 Set the content of this token.
00100 
00101         ***********************************************************************/
00102 
00103         Token set (char[] content)
00104         {
00105                 this.content = content;
00106                 return this;
00107         }
00108 
00109         /***********************************************************************
00110                 
00111                 Return the length of this token.
00112 
00113         ***********************************************************************/
00114 
00115         int getLength ()
00116         {
00117                 return content.length;
00118         }
00119 
00120         /***********************************************************************
00121         
00122                 Set the type of this token. Token types can be useful when
00123                 one wishes to categorize input patterns.
00124 
00125         ***********************************************************************/
00126 
00127         Token setType (int type)
00128         {
00129                 this.type = type;
00130                 return this;
00131         }
00132 
00133         /***********************************************************************
00134         
00135                 Return the type associated with this token. See setType().
00136 
00137         ***********************************************************************/
00138 
00139         int getType ()
00140         {
00141                 return type;
00142         }
00143 
00144         /***********************************************************************
00145         
00146                 Convert this token to an integer.
00147 
00148         ***********************************************************************/
00149 
00150         int toInt ()
00151         {
00152                 return Int.parse (content);
00153         }
00154 
00155         /***********************************************************************
00156         
00157                 Convert this token to a long integer.
00158 
00159         ***********************************************************************/
00160 
00161         long toLong ()
00162         {
00163                 return Long.parse (content);
00164         }
00165 
00166         /***********************************************************************
00167 
00168                 Convert this token to a real.
00169 
00170         ***********************************************************************/
00171 
00172         real toReal ()
00173         {
00174                 return Double.parse (content);
00175         }
00176 
00177         /***********************************************************************
00178         
00179                 Clone this token, making a copy of the content also.
00180 
00181         ***********************************************************************/
00182 
00183         Token clone ()
00184         {
00185                 Token clone = new Token;
00186 
00187                 clone.set (toString (false));
00188                 clone.type = type;
00189                 return clone;
00190         }
00191 
00192         /***********************************************************************
00193         
00194                 Return a reference to this tokens content. Duplicate it
00195                 only if 'slice' is explicitly set to false (defaults to 
00196                 a slice instead).
00197 
00198         ***********************************************************************/
00199 
00200         char[] toString (bool slice = true)
00201         {
00202                 if (slice)
00203                     return content;
00204                 return content.dup;
00205         }
00206 
00207         /***********************************************************************
00208         
00209                 Is this token equal to another?
00210 
00211         ***********************************************************************/
00212 
00213         override int opEquals (Object o)
00214         {
00215                 Token other = cast(Token) o;
00216 
00217                 if (other is null)
00218                     return super.opEquals (o);
00219                 return typeid(char[]).equals (&content, &other.content);
00220         }
00221 
00222         /***********************************************************************
00223         
00224                 Compare this token to another.
00225 
00226         ***********************************************************************/
00227 
00228         override int opCmp (Object o)
00229         {
00230                 Token other = cast(Token) o;
00231 
00232                 if (other is null)
00233                     return super.opCmp (o);
00234 
00235                 return typeid(char[]).compare (&content, &other.content);
00236         }
00237 
00238         /***********************************************************************
00239         
00240                 Hash this token
00241 
00242         ***********************************************************************/
00243 
00244         override uint toHash ()
00245         {
00246                 return typeid(char[]).getHash (&content);
00247         }
00248 
00249         /***********************************************************************
00250         
00251                 Make the Token class compatible with IWriter instances.
00252 
00253         ***********************************************************************/
00254 
00255         void write (IWriter w)
00256         {
00257                 w.put (content);
00258         }
00259 }
00260 
00261 
00262 /*******************************************************************************
00263 
00264         A style of Token that's bound to a Tokenizer. This can be a handy 
00265         means of cleaning up client code, and limiting the scope of how
00266         a token is used by recieving methods.
00267 
00268         Contrast this example with that shown in the Token class:
00269 
00270         @code
00271         // open a file for reading
00272         FileConduit fc = new FileConduit ("test.txt");
00273 
00274         // create a buffer for reading the file
00275         IBuffer buffer = fc.createBuffer;
00276 
00277         // bind a line-tokenizer to our input token
00278         BoundToken line = new BoundToken (Tokenizers.line);
00279 
00280         // read file a line at a time. Method next() returns false when no more 
00281         // delimiters are found. Note there may be an unterminated line at eof
00282         while (line.next(buffer) || line.getLength)
00283                Stdout (line) (CR);
00284         @endcode
00285 
00286         One might also consider a CompositeToken or HybridToken.
00287 
00288 *******************************************************************************/
00289 
00290 class BoundToken : Token
00291 { 
00292         private ITokenizer tk;
00293 
00294         /***********************************************************************
00295         
00296         ***********************************************************************/
00297 
00298         this (ITokenizer tk)
00299         {
00300                 this.tk = tk;
00301         }
00302 
00303         /***********************************************************************
00304         
00305                 Return the associated tokenizer
00306 
00307         ***********************************************************************/
00308 
00309         ITokenizer getTokenizer ()
00310         {     
00311                 return tk;
00312         }
00313 
00314         /***********************************************************************
00315         
00316                 Extract the next token from the provided buffer.
00317 
00318                 Returns true if a token was isolated, false if no more 
00319                 tokens were found. Note that one last token may still
00320                 be present when this return false; this may happen if
00321                 (for example) the last delimiter is missing before an
00322                 EOF condition is seen. Check token.getLength() when
00323                 this method returns false.
00324                 
00325                 For example:
00326 
00327                 @code
00328                         while (token.next() || token.getLength())
00329                                // do something
00330 
00331                 @endcode               
00332 
00333         ***********************************************************************/
00334 
00335         bool next (IBuffer buf)
00336         {
00337                 return tk.next (buf, this);
00338         }
00339 }
00340 
00341 
00342 /*******************************************************************************
00343 
00344         ReaderToken adapts a BoundToken such that it can be used directly
00345         with any IReader implementation. We just add the IReadable methods
00346         to the basic BoundToken.
00347 
00348         Here's a contrived example of how to use ReaderToken:
00349 
00350         @code
00351         // create a small buffer on the heap
00352         Buffer buf = new Buffer (256);
00353 
00354         // write items with a comma between each
00355         TextWriter write = new TextWriter (buf, ",");
00356 
00357         // write some stuff to the buffer
00358         write ("now is the time for all good men") (3.14159);
00359 
00360         // bind a couple of tokens to a comma tokenizer
00361         ReaderToken text = new ReaderToken (Tokenizers.comma);
00362         ReaderToken number = new ReaderToken (Tokenizers.comma);
00363         
00364         // create any old reader since we only use it for handling tokens
00365         Reader read = new Reader (buf);
00366 
00367         // populate both tokens via reader 
00368         read (text) (number);
00369 
00370         // print them to the console
00371         Stdout (text) (':') (number) (CR);
00372         @endcode
00373 
00374 *******************************************************************************/
00375 
00376 class ReaderToken : BoundToken, IReadable
00377 { 
00378         /***********************************************************************
00379         
00380                 Construct a ReaderToken using the provided Tokenizer.
00381 
00382         ***********************************************************************/
00383 
00384         this (ITokenizer tk)
00385         {
00386                 super (tk);
00387         }
00388 
00389         /***********************************************************************
00390         
00391                 Read the next delimited element into this token.
00392 
00393         ***********************************************************************/
00394 
00395         void read (IReader r)
00396         {
00397                 tk.next (r.getBuffer, this);
00398         }
00399 }
00400 
00401 
00402 /*******************************************************************************
00403 
00404         Another subclass of BoundToken that combines both a Tokenizer and
00405         an input buffer. This is simply a convenience wrapper than takes
00406         care of details that would otherwise clutter the client code.
00407 
00408         Compare this to usage of a basic Token:
00409 
00410         @code
00411         // open a file for reading
00412         FileConduit fc = new FileConduit ("test.txt");
00413 
00414         // create a Token and bind it to both the file and a line-tokenizer
00415         CompositeToken line = new CompositeToken (Tokenizers.line, fc);
00416 
00417         // read file a line at a time. Method get() returns false when no more 
00418         // tokens are found. 
00419         while (line.get)
00420                Stdout (line) (CR);
00421         @endcode
00422 
00423         You might also consider a HybridToken for further processing of
00424         token content.
00425 
00426 *******************************************************************************/
00427 
00428 class CompositeToken : BoundToken
00429 {       
00430         private IBuffer buffer;
00431 
00432         /***********************************************************************
00433         
00434                 Set this token to use the provided Tokenizer, and bind it 
00435                 to the given buffer.
00436 
00437         ***********************************************************************/
00438 
00439         this (ITokenizer tk, IBuffer buffer)
00440         {
00441                 super (tk);
00442                 this.buffer = buffer;
00443         }
00444 
00445         /***********************************************************************
00446         
00447                 Set this token to use the provided Tokenizer, and bind it 
00448                 to the buffer associated with the given conduit.
00449 
00450         ***********************************************************************/
00451 
00452         this (ITokenizer tk, IConduit conduit)
00453         {
00454                 this (tk, conduit.createBuffer);
00455         }
00456 
00457         /***********************************************************************
00458         
00459                 Return the associated buffer
00460 
00461         ***********************************************************************/
00462 
00463         IBuffer getBuffer ()
00464         {     
00465                 return buffer;
00466         }
00467 
00468         /***********************************************************************
00469 
00470                 Extract the next token. 
00471 
00472                 Returns true if a token was isolated, false if no more 
00473                 tokens were found. Note that one last token may still
00474                 be present when this return false; this may happen if
00475                 (for example) the last delimiter is missing before an
00476                 Eof condition is seen. Check token.getLength() when
00477                 this method returns false.
00478                 
00479                 For example:
00480 
00481                 @code
00482                         while (token.next || token.getLength)
00483                                // do something
00484 
00485                 @endcode               
00486 
00487         ***********************************************************************/
00488 
00489         bool next ()
00490         {
00491                 return tk.next (buffer, this);
00492         }
00493 
00494         /***********************************************************************
00495 
00496                 Extract the next token, taking Eof into consideration.
00497                 If next() returns false, then this function will still
00498                 return true as long as there's some content available.
00499 
00500                 For example:
00501 
00502                 @code
00503                         while (token.get)
00504                                // do something
00505 
00506                 @endcode               
00507 
00508         ***********************************************************************/
00509 
00510         bool get ()
00511         {
00512                 return next || getLength;
00513         }
00514 }
00515 
00516 
00517 /*******************************************************************************
00518 
00519         A subclass of CompositeToken that combines a Tokenizer, an input buffer,
00520         and the means to bind its content to a subordinate Reader or Token. 
00521         This is another convenience wrapper than takes care of details that
00522         would otherwise complicate client code.
00523 
00524         Compare this to usage of a CompositeToken:
00525 
00526         @code
00527         // open a file for reading
00528         FileConduit fc = new FileConduit ("test.txt");
00529 
00530         // create a Token and bind it to both the file and a line-tokenizer
00531         HybridToken line = new HybridToken (Tokenizers.line, fc);
00532 
00533         // now create a reader upon the token
00534         Reader input = new Reader (line.getHost);
00535 
00536         // read file a line at a time. Method get() returns false when no more 
00537         // tokens are found. 
00538         while (line.get)
00539               {
00540               int x, y;
00541                 
00542               // reader is now bound to the content of the current line
00543               input (x) (y);
00544 
00545               Stdout (x) (y) (CR);
00546               }
00547         @endcode
00548 
00549         You can use the same mechanism to bind subordinate Tokens:
00550 
00551         @code
00552         // open a file for reading
00553         FileConduit fc = new FileConduit ("test.txt");
00554 
00555         // create a Token and bind it to both the file and a line-tokenizer
00556         HybridToken line = new HybridToken (Tokenizers.line, fc);
00557 
00558         // now create a subordinate Token that splits on whitespace
00559         CompositeToken word = new CompositeToken (Tokenizers.space, line.getHost);
00560 
00561         // read file a line at a time. Method get() returns false when no more 
00562         // tokens are found. 
00563         while (line.get)
00564                // extract space delimited tokens from each line
00565                while (word.get)
00566                       Stdout (word) (CR);
00567         @endcode
00568 
00569 
00570 *******************************************************************************/
00571 
00572 class HybridToken : CompositeToken
00573 {       
00574         private IBuffer host;
00575 
00576         /***********************************************************************
00577         
00578                 Set this token to use the provided Tokenizer, and bind it 
00579                 to the given buffer.
00580 
00581         ***********************************************************************/
00582 
00583         this (ITokenizer tk, IBuffer buffer)
00584         {
00585                 super (tk, buffer);
00586 
00587                 // create the hosting IBuffer
00588                 host = buffer.create;
00589         }
00590 
00591         /***********************************************************************
00592         
00593                 Set this token to use the provided Tokenizer, and bind it 
00594                 to the buffer associated with the given conduit.
00595 
00596         ***********************************************************************/
00597 
00598         this (ITokenizer tk, IConduit conduit)
00599         {
00600                 this (tk, conduit.createBuffer);
00601         }
00602 
00603         /***********************************************************************
00604         
00605                 Return the associated host buffer. The host should be used
00606                 for purposes of binding a subordinate Token or Reader onto
00607                 the content of this token. Each call to next() will update
00608                 this content appropriately, which is also reflected within 
00609                 said host buffer.
00610 
00611                 That is, token.toString == token.getHost.toString.
00612 
00613         ***********************************************************************/
00614 
00615         IBuffer getHost ()
00616         {     
00617                 return host;
00618         }
00619 
00620         /***********************************************************************
00621 
00622                 Extract the next token. 
00623 
00624                 Returns true if a token was isolated, false if no more 
00625                 tokens were found. Note that one last token may still
00626                 be present when this return false; this may happen if
00627                 (for example) the last delimiter is missing before an
00628                 Eof condition is seen. Check token.getLength() when
00629                 this method returns false.
00630                 
00631                 For example:
00632 
00633                 @code
00634                         while (token.next || token.getLength)
00635                                // do something
00636 
00637                 @endcode               
00638 
00639         ***********************************************************************/
00640 
00641         bool next ()
00642         {
00643                 // get the next token
00644                 bool ret = super.next;
00645 
00646                 // set host content
00647                 host.setValidContent (toString);
00648 
00649                 return ret;
00650         }
00651 }

Generated on Sat Apr 9 20:11:28 2005 for Mango by doxygen 1.3.6