00001 /******************************************************************************* 00002 00003 @file TextReader.d 00004 00005 Copyright (c) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 4. Derivative works are permitted, but they must carry this notice 00027 in full and credit the original source. 00028 00029 00030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00031 00032 00033 @version Initial version, March 2004 00034 @author Kris 00035 00036 00037 *******************************************************************************/ 00038 00039 module mango.io.TextReader; 00040 00041 private import mango.convert.Type; 00042 00043 private import mango.io.Token, 00044 mango.io.Reader, 00045 mango.io.Tokenizer, 00046 mango.io.Exception; 00047 00048 /******************************************************************************* 00049 00050 Grok readable input from a stream. All input is tokenized from the 00051 associated buffer, and converted as necessary into the destination 00052 location. A SpaceTokenizer is used by default, but you can choose 00053 an alternative (such as a comma-delimiting tokenizer). 00054 00055 *******************************************************************************/ 00056 00057 class TextReader : Reader 00058 { 00059 alias Reader.get get; 00060 00061 // an individual token 00062 private Token token; 00063 00064 // internal tokenizer instance 00065 private ITokenizer tokenizer; 00066 00067 private final static char[] EOI = "unexpected end of input"; 00068 00069 /*********************************************************************** 00070 00071 Construct a TextReader on the provided buffer, using the 00072 specified Tokenizer instead of the default one. 00073 00074 ***********************************************************************/ 00075 00076 this (IBuffer buffer, ITokenizer tokenizer = Tokenizers.line) 00077 { 00078 super (buffer); 00079 this.token = new Token; 00080 this.tokenizer = tokenizer; 00081 } 00082 00083 /*********************************************************************** 00084 00085 Construct a TextReader upon the buffer associated with the 00086 given conduit. 00087 00088 ***********************************************************************/ 00089 00090 this (IConduit conduit, ITokenizer tokenizer = Tokenizers.line) 00091 { 00092 this (new Buffer(conduit), tokenizer); 00093 } 00094 00095 /*********************************************************************** 00096 00097 ***********************************************************************/ 00098 00099 override uint read (void* src, uint bytes, uint type) 00100 { 00101 int length = bytes; 00102 00103 // get width of elements (note: does not work for bit[]) 00104 int width = Type.widths[type]; 00105 00106 // for all bytes in source ... 00107 while (bytes) 00108 { 00109 switch (type) 00110 { 00111 case Type.Bool: 00112 *cast(bool*) src = cast(bool) (next.toString() == "true"); 00113 break; 00114 00115 case Type.Byte: 00116 case Type.UByte: 00117 *cast(ubyte*) src = cast(ubyte) next.toInt(); 00118 break; 00119 00120 case Type.Short: 00121 case Type.UShort: 00122 *cast(ushort*) src = cast(ushort) next.toInt(); 00123 break; 00124 00125 case Type.Int: 00126 case Type.UInt: 00127 *cast(uint*) src = next.toInt(); 00128 break; 00129 00130 case Type.Long: 00131 case Type.ULong: 00132 *cast(long*) src = next.toLong(); 00133 break; 00134 00135 case Type.Float: 00136 *cast(float*) src = next.toReal(); 00137 break; 00138 00139 case Type.Double: 00140 *cast(double*) src = next.toReal(); 00141 break; 00142 00143 case Type.Real: 00144 *cast(real*) src = next.toReal(); 00145 break; 00146 00147 default: 00148 error ("unknown type handed to TextReader.read()"); 00149 } 00150 00151 // bump counters and loop around for next instance 00152 bytes -= width; 00153 src += width; 00154 } 00155 00156 return length; 00157 } 00158 00159 /*********************************************************************** 00160 00161 ***********************************************************************/ 00162 00163 override IReader get (inout char[] x, uint elements = uint.max) 00164 { 00165 // dup the input string, to avoid surprises 00166 x = nextToken.toString (false); 00167 return this; 00168 } 00169 00170 /*********************************************************************** 00171 00172 @todo - Tokenizer needs to handle wchar[] before this will 00173 operate correctly 00174 00175 ***********************************************************************/ 00176 00177 override IReader get (inout wchar[] x, uint elements = uint.max) 00178 { 00179 assert (0); 00180 } 00181 00182 /*********************************************************************** 00183 00184 @todo - Tokenizer needs to handle dchar[] before this will 00185 operate correctly 00186 00187 ***********************************************************************/ 00188 00189 override IReader get (inout dchar[] x, uint elements = uint.max) 00190 { 00191 assert (0); 00192 } 00193 00194 /*********************************************************************** 00195 00196 Throw an exception if the input requested is not available. 00197 Reader instances expect the input to be available, so you 00198 might think of them as being applied to a fixed-format 00199 file. Tokenizers, on the other hand, are more flexible in 00200 that they return true until no more tokens are available. 00201 00202 Readers take care of assignment to user-space variables, 00203 whereas Tokenizers just make the raw content available. 00204 00205 ***********************************************************************/ 00206 00207 private static final void error (char[] msg = EOI) 00208 { 00209 throw new TokenException (msg); 00210 } 00211 00212 /*********************************************************************** 00213 00214 Internal method to capture the next token. 00215 00216 ***********************************************************************/ 00217 00218 private final Token nextToken () 00219 { 00220 if (! tokenizer.next (buffer, token)) 00221 error (); 00222 return token; 00223 } 00224 00225 /*********************************************************************** 00226 00227 Internal method to isolate the next token, and check its 00228 length. This is used when converting to non-char types, 00229 such as integers ~ an empty token is illegal, since we 00230 don't support default values. 00231 00232 @todo: should the empty-token rule be relaxed? 00233 00234 ***********************************************************************/ 00235 00236 private final Token next () 00237 { 00238 nextToken(); 00239 if (token.getLength == 0) 00240 error (); 00241 return token; 00242 } 00243 00244 /*********************************************************************** 00245 00246 Is this Reader text oriented? 00247 00248 ***********************************************************************/ 00249 00250 bool isTextBased() 00251 { 00252 return true; 00253 } 00254 }