Main Page | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members | Related Pages

UMango.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file UMango.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032 
00033         @version        Initial version, October 2004      
00034         @author         Kris
00035 
00036 *******************************************************************************/
00037 
00038 module mango.icu.UMango;
00039 
00040 public  import mango.icu.UConverter;
00041 
00042 private import mango.convert.Type;
00043 
00044 /*******************************************************************************
00045 
00046         Include these classes when compiled with the Mango.io package.
00047         They represent the 'glue' to bind said package to the unicode 
00048         converters provided by ICU.
00049 
00050 *******************************************************************************/
00051 
00052 version (Isolated){}
00053 else
00054 {
00055         private import mango.io.model.IReader;
00056         private import mango.io.model.IWriter;
00057 
00058         /***********************************************************************
00059 
00060                 Abstract base class for String decoders. These decoders
00061                 bind the ICU functionality to the Mango.io package, and
00062                 provide some utility functions such as input streaming.
00063 
00064                 These decoder classes will always attempt to fill their
00065                 destination (provided) output array, but may terminate
00066                 early if (a) a defined read 'limit' on the input stream 
00067                 has been reached or (b) a partial surrogate-pair would
00068                 be left at the output tail. Each decoder returns a count
00069                 of how many output elements were actually converted.
00070 
00071         ***********************************************************************/
00072 
00073         class StringDecoder : AbstractDecoder, IReadable
00074         {
00075                 private UConverter      cvt;
00076                 private bool            done;
00077                 private IBuffer         bound;
00078                 private uint            limit = uint.max;
00079 
00080                 /***************************************************************
00081 
00082                         Decoders can be used to convert directly into a 
00083                         provided destination. The converter will try to 
00084                         fill the destination, up to the configured input 
00085                         'limit', and returns the number of elements thus
00086                         converted. This returned value will be less than
00087                         the destination capacity when either the 'limit'
00088                         was reached, or when a partial surrogate would  
00089                         be placed at the tail.
00090 
00091                 ***************************************************************/
00092 
00093                 abstract uint read (IBuffer b, wchar[] dst);
00094 
00095                 /***************************************************************
00096 
00097                         Signature for BufferDecoder handlers. These 
00098                         decoders are intended to be usable as the 
00099                         default handlers within the reader constructs. 
00100                         Use IReader.setDecoder() to set a decoder as 
00101                         the default handler.
00102 
00103                 ***************************************************************/
00104 
00105                 abstract uint decoder (void* p, uint capacity, uint type);
00106 
00107                 /***************************************************************
00108 
00109                         Return the type of this decoder
00110 
00111                 ***************************************************************/
00112 
00113                 abstract uint type ();
00114 
00115                 /***************************************************************
00116 
00117                         Set the limit for this decoder. This will cause
00118                         the decoder to halt after reading the specified 
00119                         number of bytes from its input. The decoder may
00120                         also halt before that point if the destination
00121                         becomes full. Use method toGo() to monitor how 
00122                         much content has been read so far.
00123 
00124                 ***************************************************************/
00125 
00126                 void setLimit (uint limit)
00127                 {
00128                         this.limit = limit;
00129                 }
00130 
00131                 /***************************************************************
00132 
00133                         Change the converter used for this decoder.
00134 
00135                 ***************************************************************/
00136 
00137                 void setConverter (UConverter cvt)
00138                 in {
00139                    assert (cvt);
00140                    }
00141                 body
00142                 {
00143                         this.cvt = cvt;
00144                 }
00145 
00146                 /***************************************************************
00147 
00148                         Reset the converter and the input limit. The latter
00149                         defaults to being unlimited, causing the decoder to
00150                         read until the destination is full.
00151 
00152                 ***************************************************************/
00153 
00154                 void reset (uint limit = uint.max)
00155                 {
00156                         setLimit  (limit);
00157                         cvt.reset ();
00158                 }
00159 
00160                 /***************************************************************
00161 
00162                         Return the number of bytes yet to be read
00163 
00164                 ***************************************************************/
00165 
00166                 protected uint toGo ()
00167                 {
00168                         return limit;
00169                 }
00170 
00171                 /***************************************************************
00172 
00173                         Placeholder for subclasses to do something useful
00174                         when applied to an IReader. See UString for an 
00175                         example of such usage.
00176 
00177                 ***************************************************************/
00178 
00179                 protected void read (IReader r)
00180                 {
00181                 }
00182 
00183                 /***************************************************************
00184 
00185                         Bind this StringDecoder to the specified IReader.
00186                         This is invoked by an IReader to install it as the 
00187                         default handler, and thus be used by all subsequent 
00188                         IReader.get() requests for the subclass type. 
00189                         
00190                         Note that the byte limit will be respected if 'limit' 
00191                         has been set, which can be useful when converting an 
00192                         unknown number of elements (a la HTTP).
00193 
00194                 ***************************************************************/
00195 
00196                 final void bind (IBuffer buffer)
00197                 {
00198                         bound = buffer;
00199                 }
00200 
00201                 /***************************************************************
00202 
00203                         Decode IBuffer input until the delegate indicates
00204                         it is finished. Typically, that occurs when either
00205                         the destination is full, or the input 'limit' has
00206                         been reached.
00207 
00208                 ***************************************************************/
00209 
00210                 private final void decode (IBuffer buffer, uint delegate (void[]) dg)
00211                 {
00212                         done = false;
00213                         while (limit && !done)
00214                               {
00215                               buffer.get  (1, false);
00216                               buffer.read (dg);
00217                               }
00218                 }
00219         }
00220 
00221 
00222         /***********************************************************************
00223 
00224                 Decode a byte stream into UTF16 wchars. This decoder can:
00225 
00226                 - be used as the default wchar handler when attached to 
00227                   an IReader (see IReader.setDecoder).
00228                 
00229                 - be used directly to fill a provided destination array
00230                   with converted wchars.
00231 
00232                 - be used in either of the prior two cases with a 'limit'
00233                   placed upon the number of input bytes converted (in 
00234                   addition to the destination capacity limit). This can
00235                   be useful when the number of raw bytes is known, but 
00236                   the number of wchar elements is not, and can be handy
00237                   for streaming conversions.
00238 
00239         ***********************************************************************/
00240 
00241         class StringDecoder16 : StringDecoder
00242         {
00243                 /***************************************************************
00244 
00245                         Construct a decoder with the given UConverter, and
00246                         an optional 'limit' to the number of input bytes to
00247                         be converted.
00248 
00249                 ***************************************************************/
00250 
00251                 this (UConverter cvt, uint limit = uint.max)
00252                 {
00253                         this.cvt = cvt;
00254                         super.reset (limit);
00255                 }
00256 
00257                 /***************************************************************
00258 
00259                         Construct a decoder of the given specification, and
00260                         an optional 'limit' to the number of input bytes to
00261                         be converted.
00262 
00263                 ***************************************************************/
00264 
00265                 this (char[] type, uint limit = uint.max)
00266                 {
00267                         this (new UConverter (type), limit);
00268                 }
00269 
00270                 /***************************************************************
00271 
00272                         Return the type of this decoder
00273 
00274                 ***************************************************************/
00275 
00276                 uint type ()
00277                 {
00278                         return Type.Utf16;
00279                 }
00280 
00281                 /***************************************************************
00282 
00283                         Signature for BufferDecoder handlers. These 
00284                         decoders are intended to be usable as the 
00285                         default handlers within the reader constructs. 
00286                         Use IReader.setDecoder() to set a decoder as 
00287                         the default handler.
00288 
00289                 ***************************************************************/
00290 
00291                 protected uint decoder (void* p, uint capacity, uint type)
00292                 {       
00293                         // this ugly conversion/casting back and forth is
00294                         // a lot more efficient than the intrinsic array
00295                         // conversion generated via an array[] cast
00296                         return read (bound, (cast(wchar*) p)[0..capacity / wchar.sizeof]) * wchar.sizeof;
00297                 }
00298 
00299                 /***************************************************************
00300 
00301                         Decoders can be used to convert directly into a 
00302                         provided destination. The converter will try to 
00303                         fill the destination, up to the configured input 
00304                         'limit', and returns the number of elements thus
00305                         converted. This returned value will be less than
00306                         the destination capacity when either the 'limit'
00307                         was reached, or when a partial surrogate would  
00308                         have been placed at the tail.
00309 
00310                 ***************************************************************/
00311 
00312                 final uint read (IBuffer buffer, wchar[] dst)
00313                 {
00314                         uint produced;
00315 
00316                         uint read (void[] x)
00317                         {
00318                                 UAdjust adj;
00319                                 uint    len = x.length;
00320 
00321                                 // have we read enough from the source?
00322                                 if (len > limit)
00323                                     len = limit;
00324                                 
00325                                 // do the conversion; test for overflow.
00326                                 // There's an issue here with certain 
00327                                 // conversion types (e.g. utf7) where byte
00328                                 // combinations appear ambiguous. It is
00329                                 // possible that the converter will cache
00330                                 // such combinations until it determines 
00331                                 // the result from subsequent input data. 
00332                                 // However, if such a condition occurs at
00333                                 // the tail end of an input stream, the
00334                                 // conversion may stall whilst waiting on
00335                                 // more input. There does not appear to
00336                                 // be a means of identifying whether or
00337                                 // not content has been cached, so there 
00338                                 // is little one can do at this time ...
00339                                 // Note that this issue does not exist
00340                                 // when 'limit' is active
00341                                 done = cvt.decode (x[0..len], dst[produced..length], adj, len == 0);
00342 
00343                                 // adjust output. Note that we always clip
00344                                 // the bytes read to match the output size
00345                                 if ((produced += adj.output) >= dst.length)
00346                                      done = true;
00347 
00348                                 // are we limiting input?
00349                                 if (limit != uint.max)
00350                                     limit -= adj.input;                                
00351 
00352                                 // say how much we consumed
00353                                 return adj.input;
00354                         }
00355 
00356                         decode (buffer, &read);
00357                         return produced;
00358                 }
00359         }
00360 
00361 
00362 
00363         /***********************************************************************
00364 
00365         ***********************************************************************/
00366 
00367         class StringEncoder : AbstractEncoder
00368         {
00369                 private bool    more;
00370                 private IBuffer bound;
00371 
00372                 /***************************************************************
00373 
00374                 ***************************************************************/
00375 
00376                 abstract void reset ();
00377 
00378                 /***************************************************************
00379 
00380                 ***************************************************************/
00381 
00382                 abstract uint type ();
00383 
00384                 /***************************************************************
00385 
00386                 ***************************************************************/
00387 
00388                 abstract uint encoder (void* p, uint count, uint type);
00389 
00390                 /***************************************************************
00391 
00392                         Bind this StringEncoder to the specified IWriter.
00393                         This is invoked by an IWriter to install it as the 
00394                         default handler, and thus be used by all subsequent 
00395                         IReader.put() requests for the subclass type. 
00396                         
00397                 ***************************************************************/
00398 
00399                 void bind (IBuffer buffer)
00400                 {
00401                         bound = buffer;
00402                 }
00403 
00404                 /***************************************************************
00405 
00406                 ***************************************************************/
00407 
00408                 private final void encode (IBuffer b, uint delegate (void[]) dg)
00409                 {
00410                         more = true;
00411                         b.write (dg);
00412 
00413                         while (more)
00414                               {
00415                               // this should be some 'realistic' number, but
00416                               // is needed to handle the case of a GrowBuffer
00417                               b.makeRoom (1024);
00418                               b.write (dg);
00419                               }
00420                 }
00421         }
00422 
00423 
00424         /***********************************************************************
00425 
00426         ***********************************************************************/
00427 
00428         class StringEncoder8 : StringEncoder
00429         {
00430                 private ITranscoder xcode;
00431 
00432                 /***************************************************************
00433 
00434                         Construct an encoder for the given UConverter, 
00435                         where the source-content encoding is specified
00436                         by 'source'.
00437 
00438                         The default source-encoding is assumed to be utf8.
00439 
00440                 ***************************************************************/
00441 
00442                 this (UConverter cvt, char[] source = "utf8")
00443                 {
00444                         xcode = (new UConverter(source)).createTranscoder (cvt);
00445                 }
00446 
00447                 /***************************************************************
00448 
00449                         Construct an encoder of the given output 'type',
00450                         where the source-content encoding is specified
00451                         by 'source'.
00452 
00453                         The default source-encoding is assumed to be utf8.
00454 
00455                 ***************************************************************/
00456 
00457                 this (char[] type, char[] source = "utf8")
00458                 {
00459                         this (new UConverter(type), source);
00460                 }
00461 
00462                 /***************************************************************
00463 
00464                 ***************************************************************/
00465 
00466                 void encode (IBuffer b, char[] c)
00467                 {
00468                         uint write (void[] x)
00469                         {
00470                                 UAdjust adj;
00471 
00472                                 more = xcode.convert (c, x, adj, c.length == 0);
00473                                 c = c[adj.input..length];
00474                                 return adj.output;
00475                         }
00476 
00477                         super.encode (b, &write);
00478                 }
00479 
00480                 /***************************************************************
00481 
00482                 ***************************************************************/
00483 
00484                 protected uint encoder (void* p, uint count, uint type)
00485                 {
00486                         encode (bound, (cast(char*) p)[0..count/char.sizeof]);
00487                         return 0;
00488                 }
00489 
00490                 /***************************************************************
00491 
00492                 ***************************************************************/
00493 
00494                 uint type ()
00495                 {
00496                         return Type.Utf8;
00497                 }
00498 
00499                 /***************************************************************
00500 
00501                 ***************************************************************/
00502 
00503                 void reset ()
00504                 {
00505                         xcode.reset();
00506                 }
00507         }
00508 
00509 
00510         /***********************************************************************
00511 
00512         ***********************************************************************/
00513 
00514         class StringEncoder16 : StringEncoder
00515         {
00516                 private UConverter cvt;
00517 
00518                 /***************************************************************
00519 
00520                 ***************************************************************/
00521 
00522                 this (UConverter cvt)
00523                 {
00524                         this.cvt = cvt;
00525                 }
00526 
00527                 /***************************************************************
00528 
00529                         Construct an encoder of the given output 'type'.
00530 
00531                         The source-encoding is assumed to be utf16.
00532 
00533                 ***************************************************************/
00534 
00535                 this (char[] type)
00536                 {
00537                         this (new UConverter(type));
00538                 }
00539 
00540                 /***************************************************************
00541 
00542                 ***************************************************************/
00543 
00544                 void encode (IBuffer b, wchar[] w)
00545                 {
00546                         uint write (void[] x)
00547                         {
00548                                 UAdjust adj;
00549 
00550                                 more = cvt.encode (w, x, adj, w.length == 0);
00551                                 w = w[adj.input..length];
00552                                 return adj.output;
00553                         }
00554 
00555                         super.encode (b, &write);       
00556                 }
00557 
00558                 /***************************************************************
00559 
00560                 ***************************************************************/
00561 
00562                 protected uint encoder (void* p, uint count, uint type)
00563                 {
00564                         encode (bound, (cast(wchar*) p)[0..count/wchar.sizeof]);
00565                         return 0;
00566                 }
00567 
00568                 /***************************************************************
00569 
00570                 ***************************************************************/
00571 
00572                 uint type ()
00573                 {
00574                         return Type.Utf16;
00575                 }
00576 
00577                 /***************************************************************
00578 
00579                 ***************************************************************/
00580 
00581                 void reset ()
00582                 {
00583                         cvt.reset();
00584                 }
00585         }
00586 
00587 
00588         /***********************************************************************
00589 
00590         ***********************************************************************/
00591 
00592         class StringEncoder32 : StringEncoder
00593         {
00594                 private ITranscoder xcode;
00595 
00596                 /***************************************************************
00597 
00598                 ***************************************************************/
00599 
00600                 this (UConverter cvt)
00601                 {
00602                         xcode = (new UConverter("utf32")).createTranscoder (cvt);
00603                 }
00604 
00605                 /***************************************************************
00606 
00607                         Construct an encoder of the given output 'type'.
00608 
00609                         The source-encoding is assumed to be utf32.
00610 
00611                 ***************************************************************/
00612 
00613                 this (char[] type)
00614                 {
00615                         this (new UConverter(type));
00616                 }
00617 
00618                 /***************************************************************
00619 
00620                 ***************************************************************/
00621 
00622                 void encode (IBuffer b, dchar[] d)
00623                 {
00624                         uint write (void[] x)
00625                         {
00626                                 UAdjust adj;
00627 
00628                                 more = xcode.convert (d, x, adj, d.length == 0);
00629                                 d = d[adj.input..length];
00630                                 return adj.output;
00631                         }
00632 
00633                         super.encode (b, &write);       
00634                 }
00635 
00636                 /***************************************************************
00637 
00638                 ***************************************************************/
00639 
00640                 protected uint encoder (void* p, uint count, uint type)
00641                 {
00642                         encode (bound, (cast(dchar*) p)[0..count/dchar.sizeof]);
00643                         return 0;
00644                 }
00645 
00646                 /***************************************************************
00647 
00648                 ***************************************************************/
00649 
00650                 uint type ()
00651                 {
00652                         return Type.Utf32;
00653                 }
00654 
00655                 /***************************************************************
00656 
00657                 ***************************************************************/
00658 
00659                 void reset ()
00660                 {
00661                         xcode.reset();
00662                 }
00663         }
00664 }

Generated on Sat Dec 24 17:28:34 2005 for Mango by  doxygen 1.4.0