Main Page | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members | Related Pages

text/Token.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file Token.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032 
00033         @version        Initial version, December 2005      
00034 
00035         @author         Kris
00036 
00037 
00038 *******************************************************************************/
00039 
00040 module mango.text.Token;
00041 
00042 private import mango.text.Text;
00043 
00044 /*******************************************************************************
00045 
00046         The base class for a set of tokenizers. 
00047 
00048         There are two types of tokenizers supported ~ exclusive and 
00049         inclusive. The former are the more common kind, where a token
00050         is delimited by elements that are considered foreign. Examples
00051         include space, comma, and end-of-line delineation. Inclusive
00052         tokens are just the opposite: they look for patterns in the
00053         text that should be part of the token itself ~ everything else
00054         is considered foreign. Currently the only inclusive token type
00055         is exposed by RegexToken; everything else is of the exclusive
00056         variety.
00057 
00058         The content provided to Tokenizers is supposed to be entirely
00059         read-only. All current tokenizers abide by this rule, but it's
00060         possible a user could mutate the content through a token slice.
00061         To enforce the desired read-only aspect, the code would have to 
00062         introduce redundant copying or the compiler would have to support 
00063         read-only arrays.
00064 
00065         See LineToken, CharToken, RegexToken, QuotedToken, and SetToken.
00066 
00067 *******************************************************************************/
00068 
00069 class TokenTemplate(T)
00070 {
00071         static if (!is (T == char) && !is (T == wchar) && !is (T == dchar)) 
00072                     pragma (msg, "Template type must be char, wchar, or dchar");
00073 
00074 
00075         alias TokenTemplate Token;
00076 
00077         alias bool delegate(Token token) Refill;
00078 
00079         protected T*            peek,           // current position
00080                                 last,           // prior position
00081                                 end;            // end of content
00082         package   T[]           slice,          // current token slice
00083                                 content;        // the content to tokenize
00084         package   bool          hasTail,        // sentinel for last call
00085                                 autoTrim;       // trim tokens by default?
00086         package   Refill        refill;         // where to get new content
00087 
00088         /***********************************************************************
00089  
00090                 Locate the next token. If this Token is configured for
00091                 "refills", more content will be requested as needed.
00092                         
00093                 Returns true if a token is found; false otherwise.
00094         
00095         ***********************************************************************/
00096 
00097         abstract bool next();
00098 
00099         /***********************************************************************
00100  
00101                 Construct a token upon the given content. Automatic
00102                 refills are disabled
00103 
00104         ***********************************************************************/
00105 
00106         this (T[] string)
00107         {
00108                 prime (string);
00109                 refill = &this.noRefill;
00110         }
00111 
00112         /**********************************************************************
00113 
00114                 Iterate over the set of tokens. This provides read-only
00115                 access to the tokens
00116 
00117         **********************************************************************/
00118 
00119         int opApply (int delegate(inout T[]) dg)
00120         {
00121                 int result = 0;
00122 
00123                 while (next)
00124                       {
00125                       T[] t = get ();
00126                       result = dg (t);
00127                       if (result)
00128                           break;
00129                       }
00130                 return result;
00131         }
00132 
00133         /***********************************************************************
00134         
00135                 Enable automatic trimming of tokens?
00136       
00137         ***********************************************************************/
00138 
00139         void setAutoTrim (bool enabled)
00140         {
00141                 autoTrim = enabled;
00142         }
00143 
00144         /***********************************************************************
00145  
00146                 A Refill delegate should use this method to push the tail
00147                 of the current text. Doing so will cause the tail to be
00148                 prepended to the next 'found' token, after the content is
00149                 primed to reference fresh incoming data. Thus, a Refill
00150                 should do this:
00151 
00152                 // copy token.tail() into buffer
00153 
00154                 // append fresh content into buffer
00155                 // ...
00156                 
00157                 token.prime (buffer);
00158                 return true;
00159        
00160         ***********************************************************************/
00161 
00162         void setRefill (Refill refill)
00163         {
00164                 this.refill = refill;
00165         }
00166 
00167         /***********************************************************************
00168  
00169                 Set the content, ready for next() to start
00170 
00171         ***********************************************************************/
00172 
00173         T[] prime (T[] content)
00174         {
00175                 this.content = content;
00176                 peek = (last = content.ptr) - 1;
00177                 end = last + content.length;
00178                 return content;
00179         }
00180 
00181         /***********************************************************************
00182  
00183                 Return the current token as a slice of the content
00184         
00185         ***********************************************************************/
00186 
00187         T[] get()
00188         {
00189                 return slice;
00190         }
00191 
00192         /***********************************************************************
00193  
00194                 Return the current content-tail. This is typically used
00195                 by "refill" handlers, when they stream more content in.
00196                 
00197         ***********************************************************************/
00198 
00199         T[] tail()
00200         {
00201                 return last [0 .. end-last];
00202         }
00203 
00204         /***********************************************************************
00205  
00206                 Return how much content has been consumed
00207 
00208         ***********************************************************************/
00209 
00210         uint eaten()
00211         {
00212                 return last - content.ptr;
00213         }
00214 
00215         /***********************************************************************
00216 
00217                 Return the index of the current token. This is different 
00218                 from eaten() in that the current token may not yet have 
00219                 been consumed. Thus index() will always be less than or 
00220                 equal to eaten()
00221 
00222         ***********************************************************************/
00223 
00224         uint index()
00225         {
00226                 return slice.ptr - content.ptr;
00227         }
00228 
00229         /***********************************************************************
00230  
00231                 Trim spaces from the left and right of the current token.
00232                 Note that this is done in-place on the current slice. The
00233                 content itself is not affected.
00234         
00235         ***********************************************************************/
00236 
00237         Token trim ()
00238         {
00239                 slice = TextTemplate!(T).trim (slice);
00240                 return this;
00241         }
00242 
00243         /***********************************************************************
00244  
00245                 Internal method for subclasses to call when they locate a 
00246                 token
00247 
00248         ***********************************************************************/
00249 
00250         protected bool found (int offset = 0)
00251         {
00252                 slice = last [0 .. (peek - offset) - last];
00253                 last = peek + 1;
00254                 if (autoTrim)
00255                     trim ();
00256                 return true;
00257         }
00258 
00259         /***********************************************************************
00260  
00261                 Internal method for subclasses to call when they run out
00262                 of content to scan. This invokes the "refill" facilities
00263                 to provide additional content.
00264 
00265         ***********************************************************************/
00266 
00267         protected bool getMore ()
00268         {
00269                 hasTail = false;
00270                 if (last < end || content.ptr is null)
00271                    {
00272                    // more content available?
00273                    if (refill (this))
00274                        return true;
00275 
00276                    // set the last slice for this content
00277                    hasTail = true;
00278                    slice = tail;
00279                    if (autoTrim)
00280                        trim();
00281                    last = end;
00282                    }
00283                 return false;
00284         }
00285 
00286         /***********************************************************************
00287         
00288                 Default "refill" handler, which indicates there's no more 
00289                 content to be had
00290         
00291         ***********************************************************************/
00292 
00293         private bool noRefill (Token token)
00294         {
00295                 return false;
00296         }
00297 }
00298 
00299 //alias TokenTemplate!(char) Token;

Generated on Sat Dec 24 17:28:34 2005 for Mango by  doxygen 1.4.0