00001 /******************************************************************************* 00002 00003 @file UMango.d 00004 00005 Copyright (c) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 4. Derivative works are permitted, but they must carry this notice 00027 in full and credit the original source. 00028 00029 00030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00031 00032 00033 @version Initial version, October 2004 00034 @author Kris 00035 00036 *******************************************************************************/ 00037 00038 module mango.icu.UMango; 00039 00040 public import mango.icu.UConverter; 00041 00042 /******************************************************************************* 00043 00044 Include these classes when compiled with the Mango.io package. 00045 They represent the 'glue' to bind said package to the unicode 00046 converters provided by ICU. 00047 00048 *******************************************************************************/ 00049 00050 version (Isolated){} 00051 else 00052 { 00053 private import mango.io.model.IReader; 00054 private import mango.io.model.IWriter; 00055 00056 /*********************************************************************** 00057 00058 Abstract base class for String decoders. These decoders 00059 bind the ICU functionality to the Mango.io package, and 00060 provide some utility functions such as input streaming. 00061 00062 These decoder classes will always attempt to fill their 00063 destination (provided) output array, but may terminate 00064 early if (a) a defined read 'limit' on the input stream 00065 has been reached or (b) a partial surrogate-pair would 00066 be left at the output tail. Each decoder returns a count 00067 of how many output elements were actually converted. 00068 00069 ***********************************************************************/ 00070 00071 class StringDecoder : IReadable, IDecoder 00072 { 00073 private UConverter cvt; 00074 private bool done; 00075 private IBuffer bound; 00076 private uint limit = uint.max; 00077 00078 /*************************************************************** 00079 00080 Decoders can be used to convert directly into a 00081 provided destination. The converter will try to 00082 fill the destination, up to the configured input 00083 'limit', and returns the number of elements thus 00084 converted. This returned value will be less than 00085 the destination capacity when either the 'limit' 00086 was reached, or when a partial surrogate would 00087 be placed at the tail. 00088 00089 ***************************************************************/ 00090 00091 abstract uint read (IBuffer b, wchar[] dst); 00092 00093 /*************************************************************** 00094 00095 Signature for BufferDecoder handlers. These 00096 decoders are intended to be usable as the 00097 default handlers within the reader constructs. 00098 Use IReader.setDecoder() to set a decoder as 00099 the default handler. 00100 00101 ***************************************************************/ 00102 00103 abstract uint decoder (void* p, uint capacity, uint type); 00104 00105 /*************************************************************** 00106 00107 Return the type of this decoder 00108 00109 ***************************************************************/ 00110 00111 abstract ConverterType type (); 00112 00113 /*************************************************************** 00114 00115 Set the limit for this decoder. This will cause 00116 the decoder to halt after reading the specified 00117 number of bytes from its input. The decoder may 00118 also halt before that point if the destination 00119 becomes full. Use method toGo() to monitor how 00120 much content has been read so far. 00121 00122 ***************************************************************/ 00123 00124 void setLimit (uint limit) 00125 { 00126 this.limit = limit; 00127 } 00128 00129 /*************************************************************** 00130 00131 Change the converter used for this decoder. 00132 00133 ***************************************************************/ 00134 00135 void setConverter (UConverter cvt) 00136 in { 00137 assert (cvt); 00138 } 00139 body 00140 { 00141 this.cvt = cvt; 00142 } 00143 00144 /*************************************************************** 00145 00146 Reset the converter and the input limit. The latter 00147 defaults to being unlimited, causing the decoder to 00148 read until the destination is full. 00149 00150 ***************************************************************/ 00151 00152 void reset (uint limit = uint.max) 00153 { 00154 setLimit (limit); 00155 cvt.reset (); 00156 } 00157 00158 /*************************************************************** 00159 00160 Return the number of bytes yet to be read 00161 00162 ***************************************************************/ 00163 00164 protected uint toGo () 00165 { 00166 return limit; 00167 } 00168 00169 /*************************************************************** 00170 00171 Placeholder for subclasses to do something useful 00172 when applied to an IReader. See UString for an 00173 example of such usage. 00174 00175 ***************************************************************/ 00176 00177 protected void read (IReader r) 00178 { 00179 } 00180 00181 /*************************************************************** 00182 00183 Bind this StringDecoder to the specified IReader. 00184 This is invoked by an IReader to install it as the 00185 default handler, and thus be used by all subsequent 00186 IReader.get() requests for the subclass type. 00187 00188 Note that the byte limit will be respected if 'limit' 00189 has been set, which can be useful when converting an 00190 unknown number of elements (a la HTTP). 00191 00192 ***************************************************************/ 00193 00194 final BufferDecoder bind (IReader reader) 00195 { 00196 bound = reader.getBuffer (); 00197 return &decoder; 00198 } 00199 00200 /*************************************************************** 00201 00202 Decode IBuffer input until the delegate indicates 00203 it is finished. Typically, that occurs when either 00204 the destination is full, or the input 'limit' has 00205 been reached. 00206 00207 ***************************************************************/ 00208 00209 private final void decode (IBuffer buffer, int delegate (void[]) dg) 00210 { 00211 done = false; 00212 while (limit && !done) 00213 { 00214 buffer.get (1, false); 00215 buffer.read (dg); 00216 } 00217 } 00218 } 00219 00220 00221 /*********************************************************************** 00222 00223 Decode a byte stream into UTF16 wchars. This decoder can: 00224 00225 - be used as the default wchar handler when attached to 00226 an IReader (see IReader.setDecoder). 00227 00228 - be used directly to fill a provided destination array 00229 with converted wchars. 00230 00231 - be used in either of the prior two cases with a 'limit' 00232 placed upon the number of input bytes converted (in 00233 addition to the destination capacity limit). This can 00234 be useful when the number of raw bytes is known, but 00235 the number of wchar elements is not, and can be handy 00236 for streaming conversions. 00237 00238 ***********************************************************************/ 00239 00240 class StringDecoder16 : StringDecoder 00241 { 00242 /*************************************************************** 00243 00244 Construct a decoder with the given UConverter, and 00245 an optional 'limit' to the number of input bytes to 00246 be converted. 00247 00248 ***************************************************************/ 00249 00250 this (UConverter cvt, uint limit = uint.max) 00251 { 00252 this.cvt = cvt; 00253 super.reset (limit); 00254 } 00255 00256 /*************************************************************** 00257 00258 Construct a decoder of the given specification, and 00259 an optional 'limit' to the number of input bytes to 00260 be converted. 00261 00262 ***************************************************************/ 00263 00264 this (char[] type, uint limit = uint.max) 00265 { 00266 this (new UConverter (type), limit); 00267 } 00268 00269 /*************************************************************** 00270 00271 Return the type of this decoder 00272 00273 ***************************************************************/ 00274 00275 ConverterType type () 00276 { 00277 return ConverterType.WChar; 00278 } 00279 00280 /*************************************************************** 00281 00282 Signature for BufferDecoder handlers. These 00283 decoders are intended to be usable as the 00284 default handlers within the reader constructs. 00285 Use IReader.setDecoder() to set a decoder as 00286 the default handler. 00287 00288 ***************************************************************/ 00289 00290 protected uint decoder (void* p, uint capacity, uint type) 00291 { 00292 // this ugly conversion/casting back and forth is 00293 // a lot more efficient than the intrinsic array 00294 // conversion generated via an array[] cast 00295 return read (bound, (cast(wchar*) p)[0..capacity / wchar.sizeof]) * wchar.sizeof; 00296 } 00297 00298 /*************************************************************** 00299 00300 Decoders can be used to convert directly into a 00301 provided destination. The converter will try to 00302 fill the destination, up to the configured input 00303 'limit', and returns the number of elements thus 00304 converted. This returned value will be less than 00305 the destination capacity when either the 'limit' 00306 was reached, or when a partial surrogate would 00307 have been placed at the tail. 00308 00309 ***************************************************************/ 00310 00311 final uint read (IBuffer buffer, wchar[] dst) 00312 { 00313 uint produced; 00314 00315 int read (void[] x) 00316 { 00317 UAdjust adj; 00318 uint len = x.length; 00319 00320 // have we read enough from the source? 00321 if (len > limit) 00322 len = limit; 00323 00324 // do the conversion; test for overflow. 00325 // There's an issue here with certain 00326 // conversion types (e.g. utf7) where byte 00327 // combinations appear ambiguous. It is 00328 // possible that the converter will cache 00329 // such combinations until it determines 00330 // the result from subsequent input data. 00331 // However, if such a condition occurs at 00332 // the tail end of an input stream, the 00333 // conversion may stall whilst waiting on 00334 // more input. There does not appear to 00335 // be a means of identifying whether or 00336 // not content has been cached, so there 00337 // is little one can do at this time ... 00338 // Note that this issue does not exist 00339 // when 'limit' is active 00340 done = cvt.decode (x[0..len], dst[produced..length], adj, len == 0); 00341 00342 // adjust output. Note that we always clip 00343 // the bytes read to match the output size 00344 if ((produced += adj.output) >= dst.length) 00345 done = true; 00346 00347 // are we limiting input? 00348 if (limit != uint.max) 00349 limit -= adj.input; 00350 00351 // say how much we consumed 00352 return adj.input; 00353 } 00354 00355 decode (buffer, &read); 00356 return produced; 00357 } 00358 } 00359 00360 00361 00362 /*********************************************************************** 00363 00364 ***********************************************************************/ 00365 00366 class StringEncoder : IEncoder 00367 { 00368 private bool more; 00369 private IBuffer bound; 00370 00371 /*************************************************************** 00372 00373 ***************************************************************/ 00374 00375 abstract void reset (); 00376 00377 /*************************************************************** 00378 00379 ***************************************************************/ 00380 00381 abstract ConverterType type (); 00382 00383 /*************************************************************** 00384 00385 ***************************************************************/ 00386 00387 abstract void encoder (void* p, uint count, int type); 00388 00389 /*************************************************************** 00390 00391 Bind this StringEncoder to the specified IWriter. 00392 This is invoked by an IWriter to install it as the 00393 default handler, and thus be used by all subsequent 00394 IReader.put() requests for the subclass type. 00395 00396 ***************************************************************/ 00397 00398 final BufferEncoder bind (IWriter w) 00399 { 00400 bound = w.getBuffer (); 00401 return &encoder; 00402 } 00403 00404 /*************************************************************** 00405 00406 ***************************************************************/ 00407 00408 private final void encode (IBuffer b, int delegate (void[]) dg) 00409 { 00410 more = true; 00411 while (more) 00412 { 00413 if (! b.writable) 00414 b.flush (); 00415 b.write (dg); 00416 } 00417 } 00418 } 00419 00420 00421 /*********************************************************************** 00422 00423 ***********************************************************************/ 00424 00425 class StringEncoder8 : StringEncoder 00426 { 00427 private ITranscoder xcode; 00428 00429 /*************************************************************** 00430 00431 Construct an encoder for the given UConverter, 00432 where the source-content encoding is specified 00433 by 'source'. 00434 00435 The default source-encoding is assumed to be utf8. 00436 00437 ***************************************************************/ 00438 00439 this (UConverter cvt, char[] source = "utf8") 00440 { 00441 xcode = (new UConverter(source)).createTranscoder (cvt); 00442 } 00443 00444 /*************************************************************** 00445 00446 Construct an encoder of the given output 'type', 00447 where the source-content encoding is specified 00448 by 'source'. 00449 00450 The default source-encoding is assumed to be utf8. 00451 00452 ***************************************************************/ 00453 00454 this (char[] type, char[] source = "utf8") 00455 { 00456 this (new UConverter(type), source); 00457 } 00458 00459 /*************************************************************** 00460 00461 ***************************************************************/ 00462 00463 void encode (IBuffer b, char[] c) 00464 { 00465 int write (void[] x) 00466 { 00467 UAdjust adj; 00468 00469 more = xcode.convert (c, x, adj, c.length == 0); 00470 c = c[adj.input..length]; 00471 return adj.output; 00472 } 00473 00474 super.encode (b, &write); 00475 } 00476 00477 /*************************************************************** 00478 00479 ***************************************************************/ 00480 00481 protected void encoder (void* p, uint count, int type) 00482 { 00483 encode (bound, (cast(char*) p)[0..count/char.sizeof]); 00484 } 00485 00486 /*************************************************************** 00487 00488 ***************************************************************/ 00489 00490 ConverterType type () 00491 { 00492 return ConverterType.Char; 00493 } 00494 00495 /*************************************************************** 00496 00497 ***************************************************************/ 00498 00499 void reset () 00500 { 00501 xcode.reset(); 00502 } 00503 } 00504 00505 00506 /*********************************************************************** 00507 00508 ***********************************************************************/ 00509 00510 class StringEncoder16 : StringEncoder 00511 { 00512 private UConverter cvt; 00513 00514 /*************************************************************** 00515 00516 ***************************************************************/ 00517 00518 this (UConverter cvt) 00519 { 00520 this.cvt = cvt; 00521 } 00522 00523 /*************************************************************** 00524 00525 Construct an encoder of the given output 'type'. 00526 00527 The source-encoding is assumed to be utf16. 00528 00529 ***************************************************************/ 00530 00531 this (char[] type) 00532 { 00533 this (new UConverter(type)); 00534 } 00535 00536 /*************************************************************** 00537 00538 ***************************************************************/ 00539 00540 void encode (IBuffer b, wchar[] w) 00541 { 00542 int write (void[] x) 00543 { 00544 UAdjust adj; 00545 00546 more = cvt.encode (w, x, adj, w.length == 0); 00547 w = w[adj.input..length]; 00548 return adj.output; 00549 } 00550 00551 super.encode (b, &write); 00552 } 00553 00554 /*************************************************************** 00555 00556 ***************************************************************/ 00557 00558 protected void encoder (void* p, uint count, int type) 00559 { 00560 encode (bound, (cast(wchar*) p)[0..count/wchar.sizeof]); 00561 } 00562 00563 /*************************************************************** 00564 00565 ***************************************************************/ 00566 00567 ConverterType type () 00568 { 00569 return ConverterType.WChar; 00570 } 00571 00572 /*************************************************************** 00573 00574 ***************************************************************/ 00575 00576 void reset () 00577 { 00578 cvt.reset(); 00579 } 00580 } 00581 00582 00583 /*********************************************************************** 00584 00585 ***********************************************************************/ 00586 00587 class StringEncoder32 : StringEncoder 00588 { 00589 private ITranscoder xcode; 00590 00591 /*************************************************************** 00592 00593 ***************************************************************/ 00594 00595 this (UConverter cvt) 00596 { 00597 xcode = (new UConverter("utf32")).createTranscoder (cvt); 00598 } 00599 00600 /*************************************************************** 00601 00602 Construct an encoder of the given output 'type'. 00603 00604 The source-encoding is assumed to be utf32. 00605 00606 ***************************************************************/ 00607 00608 this (char[] type) 00609 { 00610 this (new UConverter(type)); 00611 } 00612 00613 /*************************************************************** 00614 00615 ***************************************************************/ 00616 00617 void encode (IBuffer b, dchar[] d) 00618 { 00619 int write (void[] x) 00620 { 00621 UAdjust adj; 00622 00623 more = xcode.convert (d, x, adj, d.length == 0); 00624 d = d[adj.input..length]; 00625 return adj.output; 00626 } 00627 00628 super.encode (b, &write); 00629 } 00630 00631 /*************************************************************** 00632 00633 ***************************************************************/ 00634 00635 protected void encoder (void* p, uint count, int type) 00636 { 00637 encode (bound, (cast(dchar*) p)[0..count/dchar.sizeof]); 00638 } 00639 00640 /*************************************************************** 00641 00642 ***************************************************************/ 00643 00644 ConverterType type () 00645 { 00646 return ConverterType.DChar; 00647 } 00648 00649 /*************************************************************** 00650 00651 ***************************************************************/ 00652 00653 void reset () 00654 { 00655 xcode.reset(); 00656 } 00657 } 00658 }