00001 /******************************************************************************* 00002 00003 @file TextReader.d 00004 00005 Copyright (c) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 4. Derivative works are permitted, but they must carry this notice 00027 in full and credit the original source. 00028 00029 00030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00031 00032 00033 @version Initial version, March 2004 00034 @author Kris 00035 00036 00037 *******************************************************************************/ 00038 00039 module mango.io.TextReader; 00040 00041 private import mango.io.Reader, 00042 mango.io.Exception; 00043 00044 private import mango.text.Token; 00045 00046 private import mango.convert.Type, 00047 mango.convert.Atoi, 00048 mango.convert.Double; 00049 00050 /******************************************************************************* 00051 00052 Grok readable input from a stream. All input is tokenized from the 00053 associated buffer, and converted as necessary into the destination 00054 location. A SpaceTokenizer is used by default, but you can choose 00055 an alternative (such as a comma-delimiting tokenizer). 00056 00057 *******************************************************************************/ 00058 00059 class TextReaderTemplate(T) : Reader 00060 { 00061 private DoubleTemplate!(T) dbl; 00062 private AtoiTemplate!(T) atoi; 00063 private TokenTemplate!(T) token; 00064 00065 /*********************************************************************** 00066 00067 Construct a TextReader on the provided buffer, using the 00068 specified Tokenizer instead of the default one. 00069 00070 ***********************************************************************/ 00071 00072 this (IBuffer buffer, TokenTemplate!(T) token) 00073 { 00074 super (buffer); 00075 this.token = token; 00076 token.setRefill (&refill); 00077 } 00078 00079 /*********************************************************************** 00080 00081 Construct a TextReader upon the buffer associated with the 00082 given conduit. 00083 00084 ***********************************************************************/ 00085 00086 this (IConduit conduit, TokenTemplate!(T) token) 00087 { 00088 this (new Buffer(conduit), token); 00089 } 00090 00091 /*********************************************************************** 00092 00093 Is this Reader text oriented? 00094 00095 ***********************************************************************/ 00096 00097 bool isTextBased() 00098 { 00099 return true; 00100 } 00101 00102 /*********************************************************************** 00103 00104 ***********************************************************************/ 00105 00106 override uint read (void* src, uint bytes, uint type) 00107 { 00108 int length = bytes; 00109 00110 // get width of elements (note: does not work for bit[]) 00111 int width = Type.widths[type]; 00112 00113 // for all bytes in source ... 00114 while (bytes) 00115 { 00116 T[] t = next.get (); 00117 00118 switch (type) 00119 { 00120 case Type.Bool: 00121 *cast(bool*) src = cast(bool) (t == "true"); 00122 break; 00123 00124 case Type.Byte: 00125 case Type.UByte: 00126 *cast(ubyte*) src = cast(ubyte) atoi.parse (t); 00127 break; 00128 00129 case Type.Short: 00130 case Type.UShort: 00131 *cast(ushort*) src = cast(ushort) atoi.parse (t); 00132 break; 00133 00134 case Type.Int: 00135 case Type.UInt: 00136 *cast(uint*) src = cast(uint) atoi.parse (t); 00137 break; 00138 00139 case Type.Long: 00140 case Type.ULong: 00141 *cast(long*) src = atoi.parse (t); 00142 break; 00143 00144 case Type.Float: 00145 *cast(float*) src = dbl.parse (t); 00146 break; 00147 00148 case Type.Double: 00149 *cast(double*) src = dbl.parse (t); 00150 break; 00151 00152 case Type.Real: 00153 *cast(real*) src = dbl.parse (t); 00154 break; 00155 00156 default: 00157 error ("unknown type handed to TextReader.read()"); 00158 } 00159 00160 // bump counters and loop around for next instance 00161 bytes -= width; 00162 src += width; 00163 } 00164 00165 return length; 00166 } 00167 00168 /*********************************************************************** 00169 00170 Intercept single char decoding 00171 00172 ***********************************************************************/ 00173 00174 protected override IReader decode (void* dst, uint bytes, uint type) 00175 { 00176 next(); 00177 buffer.skip (token.index - buffer.getPosition); 00178 return super.decode (dst, bytes, type); 00179 } 00180 00181 /*********************************************************************** 00182 00183 Intercept char-array decoding 00184 00185 ***********************************************************************/ 00186 00187 protected override uint count (uint elements) 00188 { 00189 nextToken(); 00190 buffer.skip (token.index - buffer.getPosition); 00191 return token.get.length; 00192 } 00193 00194 /*********************************************************************** 00195 00196 Internal method to capture the next token. 00197 00198 ***********************************************************************/ 00199 00200 private final TokenTemplate!(T) nextToken () 00201 { 00202 if (! token.next) 00203 error ("unexpected end of input"); 00204 token.trim (); 00205 return token; 00206 } 00207 00208 /*********************************************************************** 00209 00210 Internal method to isolate the next token, and check its 00211 length. This is used when converting to non-char types, 00212 such as integers ~ an empty token is illegal, since we 00213 don't support default values. 00214 00215 @todo: should the empty-token rule be relaxed? 00216 00217 ***********************************************************************/ 00218 00219 private final TokenTemplate!(T) next () 00220 { 00221 if (nextToken.get.length == 0) 00222 error ("attempt to parse an invalid empty token"); 00223 return token; 00224 } 00225 00226 /*********************************************************************** 00227 00228 Throw an exception if the input requested is not available. 00229 Reader instances expect the input to be available, so you 00230 might think of them as being applied to a fixed-format 00231 file. Tokenizers, on the other hand, are more flexible in 00232 that they return true until no more tokens are available. 00233 00234 Readers take care of assignment to user-space variables, 00235 whereas Tokenizers just make the raw content available. 00236 00237 ***********************************************************************/ 00238 00239 private static final void error (char[] msg) 00240 { 00241 throw new TokenException (msg); 00242 } 00243 00244 /*********************************************************************** 00245 00246 Refill the token content from our buffer. Returns false 00247 upon reaching EOF 00248 00249 ***********************************************************************/ 00250 00251 private bool refill (TokenTemplate!(T) token) 00252 { 00253 bool more; 00254 00255 buffer.skip (token.eaten - buffer.getPosition); 00256 if (buffer.getConduit) 00257 more = cast(bool) (buffer.fill() != IConduit.Eof); 00258 else 00259 more = cast(bool) (buffer.readable > 0); 00260 token.prime (buffer.toString); 00261 return more; 00262 } 00263 } 00264 00265 00266 // convenience alias 00267 alias TextReaderTemplate!(char) TextReader;