Main Page | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members | Related Pages

UMango.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file UMango.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032 
00033         @version        Initial version, October 2004      
00034         @author         Kris
00035 
00036 *******************************************************************************/
00037 
00038 module mango.icu.UMango;
00039 
00040 public import mango.icu.UConverter;
00041 
00042 /*******************************************************************************
00043 
00044         Include these classes when compiled with the Mango.io package.
00045         They represent the 'glue' to bind said package to the unicode 
00046         converters provided by ICU.
00047 
00048 *******************************************************************************/
00049 
00050 version (Isolated){}
00051 else
00052 {
00053         private import mango.io.model.IReader;
00054         private import mango.io.model.IWriter;
00055 
00056         /***********************************************************************
00057 
00058                 Abstract base class for String decoders. These decoders
00059                 bind the ICU functionality to the Mango.io package, and
00060                 provide some utility functions such as input streaming.
00061 
00062                 These decoder classes will always attempt to fill their
00063                 destination (provided) output array, but may terminate
00064                 early if (a) a defined read 'limit' on the input stream 
00065                 has been reached or (b) a partial surrogate-pair would
00066                 be left at the output tail. Each decoder returns a count
00067                 of how many output elements were actually converted.
00068 
00069         ***********************************************************************/
00070 
00071         class StringDecoder : IReadable, IDecoder
00072         {
00073                 private UConverter      cvt;
00074                 private bool            done;
00075                 private IBuffer         bound;
00076                 private uint            limit = uint.max;
00077 
00078                 /***************************************************************
00079 
00080                         Decoders can be used to convert directly into a 
00081                         provided destination. The converter will try to 
00082                         fill the destination, up to the configured input 
00083                         'limit', and returns the number of elements thus
00084                         converted. This returned value will be less than
00085                         the destination capacity when either the 'limit'
00086                         was reached, or when a partial surrogate would  
00087                         be placed at the tail.
00088 
00089                 ***************************************************************/
00090 
00091                 abstract uint read (IBuffer b, wchar[] dst);
00092 
00093                 /***************************************************************
00094 
00095                         Signature for BufferDecoder handlers. These 
00096                         decoders are intended to be usable as the 
00097                         default handlers within the reader constructs. 
00098                         Use IReader.setDecoder() to set a decoder as 
00099                         the default handler.
00100 
00101                 ***************************************************************/
00102 
00103                 abstract uint decoder (void* p, uint capacity, uint type);
00104 
00105                 /***************************************************************
00106 
00107                         Return the type of this decoder
00108 
00109                 ***************************************************************/
00110 
00111                 abstract ConverterType type ();
00112 
00113                 /***************************************************************
00114 
00115                         Set the limit for this decoder. This will cause
00116                         the decoder to halt after reading the specified 
00117                         number of bytes from its input. The decoder may
00118                         also halt before that point if the destination
00119                         becomes full. Use method toGo() to monitor how 
00120                         much content has been read so far.
00121 
00122                 ***************************************************************/
00123 
00124                 void setLimit (uint limit)
00125                 {
00126                         this.limit = limit;
00127                 }
00128 
00129                 /***************************************************************
00130 
00131                         Change the converter used for this decoder.
00132 
00133                 ***************************************************************/
00134 
00135                 void setConverter (UConverter cvt)
00136                 in {
00137                    assert (cvt);
00138                    }
00139                 body
00140                 {
00141                         this.cvt = cvt;
00142                 }
00143 
00144                 /***************************************************************
00145 
00146                         Reset the converter and the input limit. The latter
00147                         defaults to being unlimited, causing the decoder to
00148                         read until the destination is full.
00149 
00150                 ***************************************************************/
00151 
00152                 void reset (uint limit = uint.max)
00153                 {
00154                         setLimit  (limit);
00155                         cvt.reset ();
00156                 }
00157 
00158                 /***************************************************************
00159 
00160                         Return the number of bytes yet to be read
00161 
00162                 ***************************************************************/
00163 
00164                 protected uint toGo ()
00165                 {
00166                         return limit;
00167                 }
00168 
00169                 /***************************************************************
00170 
00171                         Placeholder for subclasses to do something useful
00172                         when applied to an IReader. See UString for an 
00173                         example of such usage.
00174 
00175                 ***************************************************************/
00176 
00177                 protected void read (IReader r)
00178                 {
00179                 }
00180 
00181                 /***************************************************************
00182 
00183                         Bind this StringDecoder to the specified IReader.
00184                         This is invoked by an IReader to install it as the 
00185                         default handler, and thus be used by all subsequent 
00186                         IReader.get() requests for the subclass type. 
00187                         
00188                         Note that the byte limit will be respected if 'limit' 
00189                         has been set, which can be useful when converting an 
00190                         unknown number of elements (a la HTTP).
00191 
00192                 ***************************************************************/
00193 
00194                 final BufferDecoder bind (IReader reader)
00195                 {
00196                         bound = reader.getBuffer ();
00197                         return &decoder;
00198                 }
00199 
00200                 /***************************************************************
00201 
00202                         Decode IBuffer input until the delegate indicates
00203                         it is finished. Typically, that occurs when either
00204                         the destination is full, or the input 'limit' has
00205                         been reached.
00206 
00207                 ***************************************************************/
00208 
00209                 private final void decode (IBuffer buffer, int delegate (void[]) dg)
00210                 {
00211                         done = false;
00212                         while (limit && !done)
00213                               {
00214                               buffer.get  (1, false);
00215                               buffer.read (dg);
00216                               }
00217                 }
00218         }
00219 
00220 
00221         /***********************************************************************
00222 
00223                 Decode a byte stream into UTF16 wchars. This decoder can:
00224 
00225                 - be used as the default wchar handler when attached to 
00226                   an IReader (see IReader.setDecoder).
00227                 
00228                 - be used directly to fill a provided destination array
00229                   with converted wchars.
00230 
00231                 - be used in either of the prior two cases with a 'limit'
00232                   placed upon the number of input bytes converted (in 
00233                   addition to the destination capacity limit). This can
00234                   be useful when the number of raw bytes is known, but 
00235                   the number of wchar elements is not, and can be handy
00236                   for streaming conversions.
00237 
00238         ***********************************************************************/
00239 
00240         class StringDecoder16 : StringDecoder
00241         {
00242                 /***************************************************************
00243 
00244                         Construct a decoder with the given UConverter, and
00245                         an optional 'limit' to the number of input bytes to
00246                         be converted.
00247 
00248                 ***************************************************************/
00249 
00250                 this (UConverter cvt, uint limit = uint.max)
00251                 {
00252                         this.cvt = cvt;
00253                         super.reset (limit);
00254                 }
00255 
00256                 /***************************************************************
00257 
00258                         Construct a decoder of the given specification, and
00259                         an optional 'limit' to the number of input bytes to
00260                         be converted.
00261 
00262                 ***************************************************************/
00263 
00264                 this (char[] type, uint limit = uint.max)
00265                 {
00266                         this (new UConverter (type), limit);
00267                 }
00268 
00269                 /***************************************************************
00270 
00271                         Return the type of this decoder
00272 
00273                 ***************************************************************/
00274 
00275                 ConverterType type ()
00276                 {
00277                         return ConverterType.WChar;
00278                 }
00279 
00280                 /***************************************************************
00281 
00282                         Signature for BufferDecoder handlers. These 
00283                         decoders are intended to be usable as the 
00284                         default handlers within the reader constructs. 
00285                         Use IReader.setDecoder() to set a decoder as 
00286                         the default handler.
00287 
00288                 ***************************************************************/
00289 
00290                 protected uint decoder (void* p, uint capacity, uint type)
00291                 {       
00292                         // this ugly conversion/casting back and forth is
00293                         // a lot more efficient than the intrinsic array
00294                         // conversion generated via an array[] cast
00295                         return read (bound, (cast(wchar*) p)[0..capacity / wchar.sizeof]) * wchar.sizeof;
00296                 }
00297 
00298                 /***************************************************************
00299 
00300                         Decoders can be used to convert directly into a 
00301                         provided destination. The converter will try to 
00302                         fill the destination, up to the configured input 
00303                         'limit', and returns the number of elements thus
00304                         converted. This returned value will be less than
00305                         the destination capacity when either the 'limit'
00306                         was reached, or when a partial surrogate would  
00307                         have been placed at the tail.
00308 
00309                 ***************************************************************/
00310 
00311                 final uint read (IBuffer buffer, wchar[] dst)
00312                 {
00313                         uint produced;
00314 
00315                         int read (void[] x)
00316                         {
00317                                 UAdjust adj;
00318                                 uint    len = x.length;
00319 
00320                                 // have we read enough from the source?
00321                                 if (len > limit)
00322                                     len = limit;
00323                                 
00324                                 // do the conversion; test for overflow.
00325                                 // There's an issue here with certain 
00326                                 // conversion types (e.g. utf7) where byte
00327                                 // combinations appear ambiguous. It is
00328                                 // possible that the converter will cache
00329                                 // such combinations until it determines 
00330                                 // the result from subsequent input data. 
00331                                 // However, if such a condition occurs at
00332                                 // the tail end of an input stream, the
00333                                 // conversion may stall whilst waiting on
00334                                 // more input. There does not appear to
00335                                 // be a means of identifying whether or
00336                                 // not content has been cached, so there 
00337                                 // is little one can do at this time ...
00338                                 // Note that this issue does not exist
00339                                 // when 'limit' is active
00340                                 done = cvt.decode (x[0..len], dst[produced..length], adj, len == 0);
00341 
00342                                 // adjust output. Note that we always clip
00343                                 // the bytes read to match the output size
00344                                 if ((produced += adj.output) >= dst.length)
00345                                      done = true;
00346 
00347                                 // are we limiting input?
00348                                 if (limit != uint.max)
00349                                     limit -= adj.input;                                
00350 
00351                                 // say how much we consumed
00352                                 return adj.input;
00353                         }
00354 
00355                         decode (buffer, &read);
00356                         return produced;
00357                 }
00358         }
00359 
00360 
00361 
00362         /***********************************************************************
00363 
00364         ***********************************************************************/
00365 
00366         class StringEncoder : IEncoder
00367         {
00368                 private bool    more;
00369                 private IBuffer bound;
00370 
00371                 /***************************************************************
00372 
00373                 ***************************************************************/
00374 
00375                 abstract void reset ();
00376 
00377                 /***************************************************************
00378 
00379                 ***************************************************************/
00380 
00381                 abstract ConverterType type ();
00382 
00383                 /***************************************************************
00384 
00385                 ***************************************************************/
00386 
00387                 abstract void encoder (void* p, uint count, int type);
00388 
00389                 /***************************************************************
00390 
00391                         Bind this StringEncoder to the specified IWriter.
00392                         This is invoked by an IWriter to install it as the 
00393                         default handler, and thus be used by all subsequent 
00394                         IReader.put() requests for the subclass type. 
00395                         
00396                 ***************************************************************/
00397 
00398                 final BufferEncoder bind (IWriter w)
00399                 {
00400                         bound = w.getBuffer ();
00401                         return &encoder;
00402                 }
00403 
00404                 /***************************************************************
00405 
00406                 ***************************************************************/
00407 
00408                 private final void encode (IBuffer b, int delegate (void[]) dg)
00409                 {
00410                         more = true;
00411                         while (more)
00412                               {
00413                               if (! b.writable)
00414                                     b.flush ();
00415                               b.write (dg);
00416                               }
00417                 }
00418         }
00419 
00420 
00421         /***********************************************************************
00422 
00423         ***********************************************************************/
00424 
00425         class StringEncoder8 : StringEncoder
00426         {
00427                 private ITranscoder xcode;
00428 
00429                 /***************************************************************
00430 
00431                         Construct an encoder for the given UConverter, 
00432                         where the source-content encoding is specified
00433                         by 'source'.
00434 
00435                         The default source-encoding is assumed to be utf8.
00436 
00437                 ***************************************************************/
00438 
00439                 this (UConverter cvt, char[] source = "utf8")
00440                 {
00441                         xcode = (new UConverter(source)).createTranscoder (cvt);
00442                 }
00443 
00444                 /***************************************************************
00445 
00446                         Construct an encoder of the given output 'type',
00447                         where the source-content encoding is specified
00448                         by 'source'.
00449 
00450                         The default source-encoding is assumed to be utf8.
00451 
00452                 ***************************************************************/
00453 
00454                 this (char[] type, char[] source = "utf8")
00455                 {
00456                         this (new UConverter(type), source);
00457                 }
00458 
00459                 /***************************************************************
00460 
00461                 ***************************************************************/
00462 
00463                 void encode (IBuffer b, char[] c)
00464                 {
00465                         int write (void[] x)
00466                         {
00467                                 UAdjust adj;
00468 
00469                                 more = xcode.convert (c, x, adj, c.length == 0);
00470                                 c = c[adj.input..length];
00471                                 return adj.output;
00472                         }
00473 
00474                         super.encode (b, &write);
00475                 }
00476 
00477                 /***************************************************************
00478 
00479                 ***************************************************************/
00480 
00481                 protected void encoder (void* p, uint count, int type)
00482                 {
00483                         encode (bound, (cast(char*) p)[0..count/char.sizeof]);
00484                 }
00485 
00486                 /***************************************************************
00487 
00488                 ***************************************************************/
00489 
00490                 ConverterType type ()
00491                 {
00492                         return ConverterType.Char;
00493                 }
00494 
00495                 /***************************************************************
00496 
00497                 ***************************************************************/
00498 
00499                 void reset ()
00500                 {
00501                         xcode.reset();
00502                 }
00503         }
00504 
00505 
00506         /***********************************************************************
00507 
00508         ***********************************************************************/
00509 
00510         class StringEncoder16 : StringEncoder
00511         {
00512                 private UConverter cvt;
00513 
00514                 /***************************************************************
00515 
00516                 ***************************************************************/
00517 
00518                 this (UConverter cvt)
00519                 {
00520                         this.cvt = cvt;
00521                 }
00522 
00523                 /***************************************************************
00524 
00525                         Construct an encoder of the given output 'type'.
00526 
00527                         The source-encoding is assumed to be utf16.
00528 
00529                 ***************************************************************/
00530 
00531                 this (char[] type)
00532                 {
00533                         this (new UConverter(type));
00534                 }
00535 
00536                 /***************************************************************
00537 
00538                 ***************************************************************/
00539 
00540                 void encode (IBuffer b, wchar[] w)
00541                 {
00542                         int write (void[] x)
00543                         {
00544                                 UAdjust adj;
00545 
00546                                 more = cvt.encode (w, x, adj, w.length == 0);
00547                                 w = w[adj.input..length];
00548                                 return adj.output;
00549                         }
00550 
00551                         super.encode (b, &write);       
00552                 }
00553 
00554                 /***************************************************************
00555 
00556                 ***************************************************************/
00557 
00558                 protected void encoder (void* p, uint count, int type)
00559                 {
00560                         encode (bound, (cast(wchar*) p)[0..count/wchar.sizeof]);
00561                 }
00562 
00563                 /***************************************************************
00564 
00565                 ***************************************************************/
00566 
00567                 ConverterType type ()
00568                 {
00569                         return ConverterType.WChar;
00570                 }
00571 
00572                 /***************************************************************
00573 
00574                 ***************************************************************/
00575 
00576                 void reset ()
00577                 {
00578                         cvt.reset();
00579                 }
00580         }
00581 
00582 
00583         /***********************************************************************
00584 
00585         ***********************************************************************/
00586 
00587         class StringEncoder32 : StringEncoder
00588         {
00589                 private ITranscoder xcode;
00590 
00591                 /***************************************************************
00592 
00593                 ***************************************************************/
00594 
00595                 this (UConverter cvt)
00596                 {
00597                         xcode = (new UConverter("utf32")).createTranscoder (cvt);
00598                 }
00599 
00600                 /***************************************************************
00601 
00602                         Construct an encoder of the given output 'type'.
00603 
00604                         The source-encoding is assumed to be utf32.
00605 
00606                 ***************************************************************/
00607 
00608                 this (char[] type)
00609                 {
00610                         this (new UConverter(type));
00611                 }
00612 
00613                 /***************************************************************
00614 
00615                 ***************************************************************/
00616 
00617                 void encode (IBuffer b, dchar[] d)
00618                 {
00619                         int write (void[] x)
00620                         {
00621                                 UAdjust adj;
00622 
00623                                 more = xcode.convert (d, x, adj, d.length == 0);
00624                                 d = d[adj.input..length];
00625                                 return adj.output;
00626                         }
00627 
00628                         super.encode (b, &write);       
00629                 }
00630 
00631                 /***************************************************************
00632 
00633                 ***************************************************************/
00634 
00635                 protected void encoder (void* p, uint count, int type)
00636                 {
00637                         encode (bound, (cast(dchar*) p)[0..count/dchar.sizeof]);
00638                 }
00639 
00640                 /***************************************************************
00641 
00642                 ***************************************************************/
00643 
00644                 ConverterType type ()
00645                 {
00646                         return ConverterType.DChar;
00647                 }
00648 
00649                 /***************************************************************
00650 
00651                 ***************************************************************/
00652 
00653                 void reset ()
00654                 {
00655                         xcode.reset();
00656                 }
00657         }
00658 }

Generated on Mon Nov 14 10:59:41 2005 for Mango by  doxygen 1.4.0