Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

UMango.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file UMango.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032 
00033         @version        Initial version, October 2004      
00034         @author         Kris
00035 
00036 *******************************************************************************/
00037 
00038 module mango.icu.UMango;
00039 
00040 public import mango.icu.UConverter;
00041 
00042 /*******************************************************************************
00043 
00044         Include these classes when compiled with the Mango.io package.
00045         They represent the 'glue' to bind said package to the unicode 
00046         converters provided by ICU.
00047 
00048 *******************************************************************************/
00049 
00050 version (Mango)
00051 {
00052         private import mango.io.model.IReader;
00053         private import mango.io.model.IWriter;
00054 
00055         /***********************************************************************
00056 
00057                 Abstract base class for String decoders. These decoders
00058                 bind the ICU functionality to the Mango.io package, and
00059                 provide some utility functions such as input streaming.
00060 
00061                 These decoder classes will always attempt to fill their
00062                 destination (provided) output array, but may terminate
00063                 early if (a) a defined read 'limit' on the input stream 
00064                 has been reached or (b) a partial surrogate-pair would
00065                 be left at the output tail. Each decoder returns a count
00066                 of how many output elements were actually converted.
00067 
00068         ***********************************************************************/
00069 
00070         class StringDecoder : IReadable, IDecoder
00071         {
00072                 private UConverter      cvt;
00073                 private bool            done;
00074                 private IBuffer         bound;
00075                 private uint            limit = uint.max;
00076 
00077                 /***************************************************************
00078 
00079                         Decoders can be used to convert directly into a 
00080                         provided destination. The converter will try to 
00081                         fill the destination, up to the configured input 
00082                         'limit', and returns the number of elements thus
00083                         converted. This returned value will be less than
00084                         the destination capacity when either the 'limit'
00085                         was reached, or when a partial surrogate would  
00086                         be placed at the tail.
00087 
00088                 ***************************************************************/
00089 
00090                 abstract uint read (IBuffer b, wchar[] dst);
00091 
00092                 /***************************************************************
00093 
00094                         Signature for BufferDecoder handlers. These 
00095                         decoders are intended to be usable as the 
00096                         default handlers within the reader constructs. 
00097                         Use IReader.setDecoder() to set a decoder as 
00098                         the default handler.
00099 
00100                 ***************************************************************/
00101 
00102                 abstract uint decoder (void* p, uint capacity);
00103 
00104                 /***************************************************************
00105 
00106                         Return the type of this decoder
00107 
00108                 ***************************************************************/
00109 
00110                 abstract ConverterType type ();
00111 
00112                 /***************************************************************
00113 
00114                         Set the limit for this decoder. This will cause
00115                         the decoder to halt after reading the specified 
00116                         number of bytes from its input. The decoder may
00117                         also halt before that point if the destination
00118                         becomes full. Use method toGo() to monitor how 
00119                         much content has been read so far.
00120 
00121                 ***************************************************************/
00122 
00123                 void setLimit (uint limit)
00124                 {
00125                         this.limit = limit;
00126                 }
00127 
00128                 /***************************************************************
00129 
00130                         Change the converter used for this decoder.
00131 
00132                 ***************************************************************/
00133 
00134                 void setConverter (UConverter cvt)
00135                 in {
00136                    assert (cvt);
00137                    }
00138                 body
00139                 {
00140                         this.cvt = cvt;
00141                 }
00142 
00143                 /***************************************************************
00144 
00145                         Reset the converter and the input limit. The latter
00146                         defaults to being unlimited, causing the decoder to
00147                         read until the destination is full.
00148 
00149                 ***************************************************************/
00150 
00151                 void reset (uint limit = uint.max)
00152                 {
00153                         setLimit  (limit);
00154                         cvt.reset ();
00155                 }
00156 
00157                 /***************************************************************
00158 
00159                         Return the number of bytes yet to be read
00160 
00161                 ***************************************************************/
00162 
00163                 protected uint toGo ()
00164                 {
00165                         return limit;
00166                 }
00167 
00168                 /***************************************************************
00169 
00170                         Placeholder for subclasses to do something useful
00171                         when applied to an IReader. See UString for an 
00172                         example of such usage.
00173 
00174                 ***************************************************************/
00175 
00176                 protected void read (IReader r)
00177                 {
00178                 }
00179 
00180                 /***************************************************************
00181 
00182                         Bind this StringDecoder to the specified IReader.
00183                         This is invoked by an IReader to install it as the 
00184                         default handler, and thus be used by all subsequent 
00185                         IReader.get() requests for the subclass type. 
00186                         
00187                         Note that the byte limit will be respected if 'limit' 
00188                         has been set, which can be useful when converting an 
00189                         unknown number of elements (a la HTTP).
00190 
00191                 ***************************************************************/
00192 
00193                 final BufferDecoder bind (IReader reader)
00194                 {
00195                         bound = reader.getBuffer ();
00196                         return &decoder;
00197                 }
00198 
00199                 /***************************************************************
00200 
00201                         Decode IBuffer input until the delegate indicates
00202                         it is finished. Typically, that occurs when either
00203                         the destination is full, or the input 'limit' has
00204                         been reached.
00205 
00206                 ***************************************************************/
00207 
00208                 private final void decode (IBuffer buffer, int delegate (void[]) dg)
00209                 {
00210                         done = false;
00211                         while (limit && !done)
00212                               {
00213                               buffer.get  (1, false);
00214                               buffer.read (dg);
00215                               }
00216                 }
00217         }
00218 
00219 
00220         /***********************************************************************
00221 
00222                 Decode a byte stream into UTF16 wchars. This decoder can:
00223 
00224                 - be used as the default wchar handler when attached to 
00225                   an IReader (see IReader.setDecoder).
00226                 
00227                 - be used directly to fill a provided destination array
00228                   with converted wchars.
00229 
00230                 - be used in either of the prior two cases with a 'limit'
00231                   placed upon the number of input bytes converted (in 
00232                   addition to the destination capacity limit). This can
00233                   be useful when the number of raw bytes is known, but 
00234                   the number of wchar elements is not, and can be handy
00235                   for streaming conversions.
00236 
00237         ***********************************************************************/
00238 
00239         class StringDecoder16 : StringDecoder
00240         {
00241                 /***************************************************************
00242 
00243                         Construct a decoder with the given UConverter, and
00244                         an optional 'limit' to the number of input bytes to
00245                         be converted.
00246 
00247                 ***************************************************************/
00248 
00249                 this (UConverter cvt, uint limit = uint.max)
00250                 {
00251                         this.cvt = cvt;
00252                         super.reset (limit);
00253                 }
00254 
00255                 /***************************************************************
00256 
00257                         Construct a decoder of the given specification, and
00258                         an optional 'limit' to the number of input bytes to
00259                         be converted.
00260 
00261                 ***************************************************************/
00262 
00263                 this (char[] type, uint limit = uint.max)
00264                 {
00265                         this (new UConverter (type), limit);
00266                 }
00267 
00268                 /***************************************************************
00269 
00270                         Return the type of this decoder
00271 
00272                 ***************************************************************/
00273 
00274                 ConverterType type ()
00275                 {
00276                         return ConverterType.WChar;
00277                 }
00278 
00279                 /***************************************************************
00280 
00281                         Signature for BufferDecoder handlers. These 
00282                         decoders are intended to be usable as the 
00283                         default handlers within the reader constructs. 
00284                         Use IReader.setDecoder() to set a decoder as 
00285                         the default handler.
00286 
00287                 ***************************************************************/
00288 
00289                 protected uint decoder (void* p, uint capacity)
00290                 {       
00291                         // this ugly conversion/casting back and forth is
00292                         // a lot more efficient than the intrinsic array
00293                         // conversion generated via an array[] cast
00294                         return read (bound, (cast(wchar*) p)[0..capacity / wchar.sizeof]) * wchar.sizeof;
00295                 }
00296 
00297                 /***************************************************************
00298 
00299                         Decoders can be used to convert directly into a 
00300                         provided destination. The converter will try to 
00301                         fill the destination, up to the configured input 
00302                         'limit', and returns the number of elements thus
00303                         converted. This returned value will be less than
00304                         the destination capacity when either the 'limit'
00305                         was reached, or when a partial surrogate would  
00306                         have been placed at the tail.
00307 
00308                 ***************************************************************/
00309 
00310                 final uint read (IBuffer buffer, wchar[] dst)
00311                 {
00312                         uint produced;
00313 
00314                         int read (void[] x)
00315                         {
00316                                 UAdjust adj;
00317                                 uint    len = x.length;
00318 
00319                                 // have we read enough from the source?
00320                                 if (len > limit)
00321                                     len = limit;
00322                                 
00323                                 // do the conversion; test for overflow.
00324                                 // There's an issue here with certain 
00325                                 // conversion types (e.g. utf7) where byte
00326                                 // combinations appear ambiguous. It is
00327                                 // possible that the converter will cache
00328                                 // such combinations until it determines 
00329                                 // the result from subsequent input data. 
00330                                 // However, if such a condition occurs at
00331                                 // the tail end of an input stream, the
00332                                 // conversion may stall whilst waiting on
00333                                 // more input. There does not appear to
00334                                 // be a means of identifying whether or
00335                                 // not content has been cached, so there 
00336                                 // is little one can do at this time ...
00337                                 // Note that this issue does not exist
00338                                 // when 'limit' is active
00339                                 done = cvt.decode (x[0..len], dst[produced..length], adj, len == 0);
00340 
00341                                 // adjust output. Note that we always clip
00342                                 // the bytes read to match the output size
00343                                 if ((produced += adj.output) >= dst.length)
00344                                      done = true;
00345 
00346                                 // are we limiting input?
00347                                 if (limit != uint.max)
00348                                     limit -= adj.input;                                
00349 
00350                                 // say how much we consumed
00351                                 return adj.input;
00352                         }
00353 
00354                         decode (buffer, &read);
00355                         return produced;
00356                 }
00357         }
00358 
00359 
00360 
00361         /***********************************************************************
00362 
00363         ***********************************************************************/
00364 
00365         class StringEncoder : IEncoder
00366         {
00367                 private bool    more;
00368                 private IBuffer bound;
00369 
00370                 /***************************************************************
00371 
00372                 ***************************************************************/
00373 
00374                 abstract void reset ();
00375 
00376                 /***************************************************************
00377 
00378                 ***************************************************************/
00379 
00380                 abstract ConverterType type ();
00381 
00382                 /***************************************************************
00383 
00384                 ***************************************************************/
00385 
00386                 abstract void encoder (void* p, uint count);
00387 
00388                 /***************************************************************
00389 
00390                         Bind this StringEncoder to the specified IWriter.
00391                         This is invoked by an IWriter to install it as the 
00392                         default handler, and thus be used by all subsequent 
00393                         IReader.put() requests for the subclass type. 
00394                         
00395                 ***************************************************************/
00396 
00397                 final BufferEncoder bind (IWriter w)
00398                 {
00399                         bound = w.getBuffer ();
00400                         return &encoder;
00401                 }
00402 
00403                 /***************************************************************
00404 
00405                 ***************************************************************/
00406 
00407                 private final void encode (IBuffer b, int delegate (void[]) dg)
00408                 {
00409                         more = true;
00410                         while (more)
00411                               {
00412                               if (! b.writable)
00413                                     b.flush ();
00414                               b.write (dg);
00415                               }
00416                 }
00417         }
00418 
00419 
00420         /***********************************************************************
00421 
00422         ***********************************************************************/
00423 
00424         class StringEncoder8 : StringEncoder
00425         {
00426                 private ITranscoder xcode;
00427 
00428                 /***************************************************************
00429 
00430                         Construct an encoder for the given UConverter, 
00431                         where the source-content encoding is specified
00432                         by 'source'.
00433 
00434                         The default source-encoding is assumed to be utf8.
00435 
00436                 ***************************************************************/
00437 
00438                 this (UConverter cvt, char[] source = "utf8")
00439                 {
00440                         xcode = (new UConverter(source)).createTranscoder (cvt);
00441                 }
00442 
00443                 /***************************************************************
00444 
00445                         Construct an encoder of the given output 'type',
00446                         where the source-content encoding is specified
00447                         by 'source'.
00448 
00449                         The default source-encoding is assumed to be utf8.
00450 
00451                 ***************************************************************/
00452 
00453                 this (char[] type, char[] source = "utf8")
00454                 {
00455                         this (new UConverter(type), source);
00456                 }
00457 
00458                 /***************************************************************
00459 
00460                 ***************************************************************/
00461 
00462                 void encode (IBuffer b, char[] c)
00463                 {
00464                         int write (void[] x)
00465                         {
00466                                 UAdjust adj;
00467 
00468                                 more = xcode.convert (c, x, adj, c.length == 0);
00469                                 c = c[adj.input..length];
00470                                 return adj.output;
00471                         }
00472 
00473                         super.encode (b, &write);
00474                 }
00475 
00476                 /***************************************************************
00477 
00478                 ***************************************************************/
00479 
00480                 protected void encoder (void* p, uint count)
00481                 {
00482                         encode (bound, (cast(char*) p)[0..count/char.sizeof]);
00483                 }
00484 
00485                 /***************************************************************
00486 
00487                 ***************************************************************/
00488 
00489                 ConverterType type ()
00490                 {
00491                         return ConverterType.Char;
00492                 }
00493 
00494                 /***************************************************************
00495 
00496                 ***************************************************************/
00497 
00498                 void reset ()
00499                 {
00500                         xcode.reset();
00501                 }
00502         }
00503 
00504 
00505         /***********************************************************************
00506 
00507         ***********************************************************************/
00508 
00509         class StringEncoder16 : StringEncoder
00510         {
00511                 private UConverter cvt;
00512 
00513                 /***************************************************************
00514 
00515                 ***************************************************************/
00516 
00517                 this (UConverter cvt)
00518                 {
00519                         this.cvt = cvt;
00520                 }
00521 
00522                 /***************************************************************
00523 
00524                         Construct an encoder of the given output 'type'.
00525 
00526                         The source-encoding is assumed to be utf16.
00527 
00528                 ***************************************************************/
00529 
00530                 this (char[] type)
00531                 {
00532                         this (new UConverter(type));
00533                 }
00534 
00535                 /***************************************************************
00536 
00537                 ***************************************************************/
00538 
00539                 void encode (IBuffer b, wchar[] w)
00540                 {
00541                         int write (void[] x)
00542                         {
00543                                 UAdjust adj;
00544 
00545                                 more = cvt.encode (w, x, adj, w.length == 0);
00546                                 w = w[adj.input..length];
00547                                 return adj.output;
00548                         }
00549 
00550                         super.encode (b, &write);       
00551                 }
00552 
00553                 /***************************************************************
00554 
00555                 ***************************************************************/
00556 
00557                 protected void encoder (void* p, uint count)
00558                 {
00559                         encode (bound, (cast(wchar*) p)[0..count/wchar.sizeof]);
00560                 }
00561 
00562                 /***************************************************************
00563 
00564                 ***************************************************************/
00565 
00566                 ConverterType type ()
00567                 {
00568                         return ConverterType.WChar;
00569                 }
00570 
00571                 /***************************************************************
00572 
00573                 ***************************************************************/
00574 
00575                 void reset ()
00576                 {
00577                         cvt.reset();
00578                 }
00579         }
00580 
00581 
00582         /***********************************************************************
00583 
00584         ***********************************************************************/
00585 
00586         class StringEncoder32 : StringEncoder
00587         {
00588                 private ITranscoder xcode;
00589 
00590                 /***************************************************************
00591 
00592                 ***************************************************************/
00593 
00594                 this (UConverter cvt)
00595                 {
00596                         xcode = (new UConverter("utf32")).createTranscoder (cvt);
00597                 }
00598 
00599                 /***************************************************************
00600 
00601                         Construct an encoder of the given output 'type'.
00602 
00603                         The source-encoding is assumed to be utf32.
00604 
00605                 ***************************************************************/
00606 
00607                 this (char[] type)
00608                 {
00609                         this (new UConverter(type));
00610                 }
00611 
00612                 /***************************************************************
00613 
00614                 ***************************************************************/
00615 
00616                 void encode (IBuffer b, dchar[] d)
00617                 {
00618                         int write (void[] x)
00619                         {
00620                                 UAdjust adj;
00621 
00622                                 more = xcode.convert (d, x, adj, d.length == 0);
00623                                 d = d[adj.input..length];
00624                                 return adj.output;
00625                         }
00626 
00627                         super.encode (b, &write);       
00628                 }
00629 
00630                 /***************************************************************
00631 
00632                 ***************************************************************/
00633 
00634                 protected void encoder (void* p, uint count)
00635                 {
00636                         encode (bound, (cast(dchar*) p)[0..count/dchar.sizeof]);
00637                 }
00638 
00639                 /***************************************************************
00640 
00641                 ***************************************************************/
00642 
00643                 ConverterType type ()
00644                 {
00645                         return ConverterType.DChar;
00646                 }
00647 
00648                 /***************************************************************
00649 
00650                 ***************************************************************/
00651 
00652                 void reset ()
00653                 {
00654                         xcode.reset();
00655                 }
00656         }
00657 }

Generated on Tue Jan 25 21:18:24 2005 for Mango by doxygen 1.3.6