Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

UString.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file UString.d
00004         
00005         Copyright (C) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026 
00027                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00028 
00029 
00030         @version        Initial version, October 2004      
00031         @author         Kris
00032 
00033         Note that this package and documentation is built around the ICU 
00034         project (http://oss.software.ibm.com/icu/). Below is the license 
00035         statement as specified by that software:
00036 
00037 
00038                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00039 
00040 
00041         ICU License - ICU 1.8.1 and later
00042 
00043         COPYRIGHT AND PERMISSION NOTICE
00044 
00045         Copyright (c) 1995-2003 International Business Machines Corporation and 
00046         others.
00047 
00048         All rights reserved.
00049 
00050         Permission is hereby granted, free of charge, to any person obtaining a
00051         copy of this software and associated documentation files (the
00052         "Software"), to deal in the Software without restriction, including
00053         without limitation the rights to use, copy, modify, merge, publish,
00054         distribute, and/or sell copies of the Software, and to permit persons
00055         to whom the Software is furnished to do so, provided that the above
00056         copyright notice(s) and this permission notice appear in all copies of
00057         the Software and that both the above copyright notice(s) and this
00058         permission notice appear in supporting documentation.
00059 
00060         THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00061         OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00062         MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
00063         OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
00064         HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
00065         INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
00066         FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
00067         NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
00068         WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
00069 
00070         Except as contained in this notice, the name of a copyright holder
00071         shall not be used in advertising or otherwise to promote the sale, use
00072         or other dealings in this Software without prior written authorization
00073         of the copyright holder.
00074 
00075         ----------------------------------------------------------------------
00076 
00077         All trademarks and registered trademarks mentioned herein are the 
00078         property of their respective owners.
00079 
00080 *******************************************************************************/
00081 
00082 module mango.icu.UString;
00083 
00084 private import  mango.icu.ICU,
00085                 mango.icu.UChar,
00086                 mango.icu.ULocale;
00087 
00088 /*******************************************************************************
00089 
00090 *******************************************************************************/
00091 
00092 private extern (C) void memmove (void* dst, void* src, uint bytes);
00093 
00094 /*******************************************************************************
00095 
00096         Bind to the IReadable and IWritable interfaces if we're building 
00097         along with the mango.io package
00098 
00099 *******************************************************************************/
00100 
00101 version (Mango)
00102         {
00103         private import  mango.io.model.IReader,
00104                         mango.io.model.IWriter;
00105         }
00106      else
00107         {
00108         interface IReadable {};
00109         interface IWritable {};
00110         }
00111 
00112 
00113 /*******************************************************************************
00114 
00115         UString is a string class that stores Unicode characters directly 
00116         and provides similar functionality as the Java String class.
00117 
00118         In ICU, a Unicode string consists of 16-bit Unicode code units. 
00119         A Unicode character may be stored with either one code unit — 
00120         which is the most common case — or with a matched pair of 
00121         special code units ("surrogates"). The data type for code units 
00122         is UChar.
00123 
00124         For single-character handling, a Unicode character code point is 
00125         a value in the range 0..0x10ffff. ICU uses the UChar32 type for 
00126         code points.
00127 
00128         Indexes and offsets into and lengths of strings always count code 
00129         units, not code points. This is the same as with multi-byte char* 
00130         strings in traditional string handling. Operations on partial 
00131         strings typically do not test for code point boundaries. If necessary, 
00132         the user needs to take care of such boundaries by testing for the code 
00133         unit values or by using functions like getChar32Start() 
00134         and getChar32Limit()
00135 
00136         UString methods are more lenient with regard to input parameter values 
00137         than other ICU APIs. In particular:
00138 
00139         - If indexes are out of bounds for a UString object (<0 or > length) 
00140           then they are "pinned" to the nearest boundary.
00141 
00142         - If primitive string pointer values (e.g., const wchar* or char*) for 
00143           input strings are null, then those input string parameters are treated 
00144           as if they pointed to an empty string. However, this is not the case 
00145           for char* parameters for charset names or other IDs.
00146         
00147 *******************************************************************************/
00148 
00149 class UString : UText
00150 {
00151         /***********************************************************************
00152         
00153                 Create an empty UString with the specified available space
00154 
00155         ***********************************************************************/
00156 
00157         this (uint space = 0)
00158         {
00159                 content.length = space;
00160                 mutable = true;
00161         }
00162 
00163         /***********************************************************************
00164         
00165                 Create a UString upon the provided content. If said content
00166                 is immutable (read-only) then you might consider setting the
00167                 'mutable' parameter to false. Doing so will avoid allocating
00168                 heap-space for the content until it is modified via one of
00169                 these methods.
00170 
00171         ***********************************************************************/
00172 
00173         this (wchar[] content, bool mutable = true)
00174         {
00175                 setTo (content, mutable);
00176         }
00177 
00178         /***********************************************************************
00179         
00180                 Create a UString via the content of a UText. Note that the
00181                 default is to assume the content is immutable (read-only).
00182                 
00183         ***********************************************************************/
00184         
00185         this (UText other, bool mutable = false)
00186         {
00187                 this (other.get, mutable);
00188         }
00189 
00190         /***********************************************************************
00191         
00192                 Create a UString via the content of a UString. If said content
00193                 is immutable (read-only) then you might consider setting the
00194                 'mutable' parameter to false. Doing so will avoid allocating
00195                 heap-space for the content until it is modified via one of
00196                 these methods.
00197 
00198         ***********************************************************************/
00199         
00200         this (UString other, bool mutable = true)
00201         {
00202                 this (other.get, mutable);
00203         }
00204 
00205         /***********************************************************************
00206         
00207                 Support for reading content via the IO system
00208 
00209         ***********************************************************************/
00210 
00211         version (Mango)
00212         {
00213                 void read (IReader r)
00214                 {
00215                         super.read (r);
00216 
00217                         // we're mutable, so ensure we don't mess with the
00218                         // IO buffers. Interestingly, changing the length 
00219                         // of a D array can account for slice assignments 
00220                         // (it checks the pointer to see if it's a starting
00221                         //  point in the pool). Unfortunately, that doesn't
00222                         // catch the case where a slice starts at offset 0,
00223                         // which is where IO buffer slices may come from. To
00224                         // be safe, we dup the content here. Note that this
00225                         // is not the case for UText, since it is read-only.
00226                         if (r.getAllocator.isReadOnly (content))
00227                             mutable = false;
00228                 }
00229         }
00230 
00231         /***********************************************************************
00232                 
00233                 Append text to this UString
00234 
00235         ***********************************************************************/
00236 
00237         UString append (UText other)
00238         {
00239                 return append (other.get);
00240         }
00241 
00242         /***********************************************************************
00243         
00244                 Append partial text to this UString
00245 
00246         ***********************************************************************/
00247 
00248         UString append (UText other, uint start, uint len=uint.max)
00249         {
00250                 other.pinIndices (start, len);
00251                 return append (other.content [start..start+len]);
00252         }
00253 
00254         /***********************************************************************
00255         
00256                 Append a single character to this UString
00257 
00258         ***********************************************************************/
00259 
00260         UString append (wchar chr)
00261         {
00262                 return append (&chr, 1);
00263         }
00264 
00265         /***********************************************************************
00266         
00267                 Append text to this UString
00268 
00269         ***********************************************************************/
00270 
00271         UString append (wchar[] chars)
00272         {
00273                 return append (chars, chars.length);
00274         }
00275 
00276         /***********************************************************************
00277                 
00278                 Set a section of this UString to the specified character
00279 
00280         ***********************************************************************/
00281 
00282         UString setTo (wchar chr, uint start=0, uint len=uint.max)
00283         {
00284                 pinIndices (start, len);
00285                 if (! mutable)
00286                       realloc ();
00287                 content [start..start+len] = chr;
00288                 return this;
00289         }
00290 
00291         /***********************************************************************
00292    
00293                 Set the content to the provided array. Parameter 'mutable'
00294                 specifies whether the given array is likely to change. If 
00295                 not, the array is aliased until such time this UString is
00296                 altered.
00297                      
00298         ***********************************************************************/
00299 
00300         UString setTo (wchar[] chars, bool mutable = true)
00301         {
00302                 len = chars.length;
00303                 if ((this.mutable = mutable) == true)
00304                      content = chars.dup;
00305                 else
00306                    content = chars;
00307                 return this;
00308         }
00309 
00310         /***********************************************************************
00311         
00312                 Replace the content of this UString. If the new content
00313                 is immutable (read-only) then you might consider setting the
00314                 'mutable' parameter to false. Doing so will avoid allocating
00315                 heap-space for the content until it is modified via one of
00316                 these methods.
00317 
00318         ***********************************************************************/
00319 
00320         UString setTo (UText other, bool mutable = true)
00321         {
00322                 return setTo (other.get, mutable);
00323         }
00324 
00325         /***********************************************************************
00326         
00327                 Replace the content of this UString. If the new content
00328                 is immutable (read-only) then you might consider setting the
00329                 'mutable' parameter to false. Doing so will avoid allocating
00330                 heap-space for the content until it is modified via one of
00331                 these methods.
00332 
00333         ***********************************************************************/
00334 
00335         UString setTo (UText other, uint start, uint len, bool mutable = true)
00336         {
00337                 other.pinIndices (start, len);
00338                 return setTo (other.content [start..start+len], mutable);
00339         }
00340 
00341         /***********************************************************************
00342         
00343                 Replace the character at the specified location.
00344 
00345         ***********************************************************************/
00346 
00347         UString setCharAt (uint index, wchar chr)
00348         {
00349                 pinIndex (index);
00350                 if (! mutable)
00351                       realloc ();
00352                 content [index] = chr;
00353                 return this;
00354         }
00355 
00356         /***********************************************************************
00357         
00358                 Remove leading and trailing whitespace from this UString
00359 
00360         ***********************************************************************/
00361 
00362         UString trim ()
00363         {
00364                 wchar   c;
00365                 uint    i = len;
00366 
00367                 // cut off trailing white space
00368                 while (i && ((c = charAt(i-1)) == 0x20 || UChar.isWhiteSpace (c)))
00369                        --i;
00370                 len = i;
00371 
00372                 // now remove leading whitespace
00373                 for (i=0; i < len && ((c = charAt(i)) == 0x20 || UChar.isWhiteSpace (c)); ++i) {}
00374                 return remove (0, i);
00375         }
00376 
00377         /***********************************************************************
00378         
00379                 Remove a piece of this UString.
00380 
00381         ***********************************************************************/
00382 
00383         UString remove (uint start, uint length=uint.max)
00384         {
00385                 pinIndices (start, length);
00386                 if (length)
00387                     if (start >= len)
00388                         truncate (start);
00389                     else
00390                        {
00391                        if (! mutable)
00392                              realloc ();
00393 
00394                        uint i = start + length;
00395                        memmove (cast(wchar*)content+start, cast(wchar*)content+i, (len-i) * wchar.sizeof);
00396                        len -= length;
00397                        }
00398                 return this;
00399         }
00400 
00401         /***********************************************************************
00402         
00403                 Truncate the length of this UString.
00404 
00405         ***********************************************************************/
00406 
00407         UString truncate (uint length=0)
00408         {
00409                 if (length <= len)
00410                     len = length;
00411                 return this;
00412         }
00413 
00414         /***********************************************************************
00415         
00416                 Insert leading spaces in this UString
00417 
00418         ***********************************************************************/
00419 
00420         UString padLeading (uint length, wchar padChar = 0x0020)
00421         {
00422                 expand  (length);
00423                 memmove (cast(wchar*) content + length, content, len * wchar.sizeof);
00424                 len += length;
00425                 return setTo (padChar, 0, length);
00426         }
00427 
00428         /***********************************************************************
00429         
00430                 Append some trailing spaces to this UString.
00431 
00432         ***********************************************************************/
00433 
00434         UString padTrailing (uint length, wchar padChar = 0x0020)
00435         {
00436                 expand (length);
00437                 len += length;
00438                 return setTo  (padChar, len-length, length);
00439         }
00440 
00441         /***********************************************************************
00442         
00443                 Check for available space within the buffer, and expand 
00444                 as necessary.
00445 
00446         ***********************************************************************/
00447 
00448         package final void expand (uint count)
00449         {
00450                 if ((len + count) > content.length)
00451                      realloc (count);
00452         }
00453 
00454         /***********************************************************************
00455         
00456                 Allocate memory dud to a change in the content. We handle 
00457                 the distinction between mutable and immutable here.
00458 
00459         ***********************************************************************/
00460 
00461         private final void realloc (uint count = 0)
00462         {
00463                 uint size = (content.length + count + 63) & ~63;
00464                 
00465                 if (mutable)
00466                     content.length = size;
00467                 else
00468                    {
00469                    mutable = true;
00470                    wchar[] x = content;
00471                    content = new wchar [size];
00472                    if (len)
00473                        content[0..len] = x;
00474                    }
00475         }
00476 
00477         /***********************************************************************
00478         
00479                 Internal method to support UString appending
00480 
00481         ***********************************************************************/
00482 
00483         private final UString append (wchar* chars, uint count)
00484         {
00485                 expand (count);
00486                 content[len..len+count] = chars[0..count];
00487                 len += count;
00488                 return this;
00489         }
00490 
00491         /***********************************************************************
00492         
00493                 Internal method to support formatting into this UString. 
00494                 This is used by many of the ICU wrappers to append content
00495                 into a UString.
00496 
00497         ***********************************************************************/
00498 
00499         typedef uint delegate (wchar* dst, uint len, inout Error e) Formatter;
00500 
00501         package final void format (Formatter format, char[] msg)
00502         {
00503                 Error   e;
00504                 uint    length;
00505 
00506                 while (true)
00507                       {
00508                       e = e.OK;
00509                       length = format (cast(wchar*)content + len, content.length - len, e);
00510                       if (e == e.BufferOverflow)
00511                           expand (length);
00512                       else
00513                          break;
00514                       } 
00515 
00516                 if (isError (e))
00517                     exception (msg);
00518 
00519                 len += length;
00520         }
00521 }
00522 
00523 
00524 /*******************************************************************************
00525 
00526         Immutable (read-only) text -- use UString for mutable strings.
00527 
00528 *******************************************************************************/
00529 
00530 class UText : ICU, IReadable, IWritable
00531 {
00532         // the core of the UText and UString attributes. The name 'len'
00533         // is used rather than the more obvious 'length' since there is
00534         // a collision with the silly array[length] syntactic sugar ...
00535         package uint    len;
00536         package wchar[] content;
00537 
00538         // this should probably be in UString only, but there seems to 
00539         // be a compiler bug where it doesn't get initialised correctly,
00540         // and it's perhaps useful to have here for when a UString is
00541         // passed as a UText argument.
00542         private bool    mutable;
00543 
00544         // toFolded() argument
00545         public enum     CaseOption 
00546                         {
00547                         Default  = 0, 
00548                         SpecialI = 1
00549                         }
00550 
00551         /***********************************************************************
00552         
00553                 Hidden constructor
00554 
00555         ***********************************************************************/
00556 
00557         private this ()
00558         {
00559         }
00560 
00561         /***********************************************************************
00562         
00563                 Construct read-only wrapper around the given content
00564 
00565         ***********************************************************************/
00566 
00567         this (wchar[] content)
00568         {
00569                 this.content = content;
00570                 this.len = content.length;
00571         }
00572 
00573         /***********************************************************************
00574         
00575                 Support for reading and writing via the Mango IO subsystem
00576 
00577         ***********************************************************************/
00578 
00579         version (Mango)
00580         {
00581                 void read (IReader r)
00582                 {
00583                         r.get (content);
00584                         len = content.length;
00585                 }
00586 
00587                 void write (IWriter w)
00588                 {
00589                         w.putw (get);
00590                 }
00591         }
00592 
00593         /***********************************************************************
00594         
00595                 Return the valid content from this UText
00596 
00597         ***********************************************************************/
00598 
00599         final package wchar[] get ()
00600         {
00601                 return content [0..len];
00602         }
00603 
00604         /***********************************************************************
00605         
00606                 Is this UText equal to another?
00607 
00608         ***********************************************************************/
00609 
00610         final override int opEquals (Object o)
00611         {
00612                 UText other = cast(UText) o;
00613 
00614                 if (other is null || other is this)
00615                     return 0;
00616 
00617                 return typeid(wchar[]).equals (&content[0..len], &other.content[0..other.len]);
00618         }
00619 
00620         /***********************************************************************
00621         
00622                 Compare this UText to another.
00623 
00624         ***********************************************************************/
00625 
00626         final override int opCmp (Object o)
00627         {
00628                 UText other = cast(UText) o;
00629 
00630                 if (other is null || other is this)
00631                     return 1;
00632 
00633                 return typeid(wchar[]).compare (&content[0..len], &other.content[0..other.len]);
00634         }
00635 
00636         /***********************************************************************
00637         
00638                 Hash this UText
00639 
00640         ***********************************************************************/
00641 
00642         final override uint toHash ()
00643         {
00644                 return typeid(wchar[]).getHash (&content[0..len]);
00645         }
00646 
00647         /***********************************************************************
00648         
00649                 Clone this UText into a UString
00650 
00651         ***********************************************************************/
00652 
00653         final UString copy ()
00654         {
00655                 return new UString (content);
00656         }
00657 
00658         /***********************************************************************
00659         
00660                 Clone a section of this UText into a UString
00661 
00662         ***********************************************************************/
00663 
00664         final UString extract (uint start, uint len=uint.max)
00665         {
00666                 pinIndices (start, len);
00667                 return new UString (content[start..start+len]);
00668         }
00669 
00670         /***********************************************************************
00671         
00672                 Count unicode code points in the length UChar code units of 
00673                 the string. A code point may occupy either one or two UChar 
00674                 code units. Counting code points involves reading all code 
00675                 units.
00676 
00677         ***********************************************************************/
00678 
00679         final uint codePoints (uint start=0, uint length=uint.max)
00680         {
00681                 pinIndices (start, length);
00682                 return u_countChar32 (cast(wchar*) content+start, length);
00683         }
00684 
00685         /***********************************************************************
00686         
00687                 Return an indication whether or not there are surrogate pairs
00688                 within the string.
00689 
00690         ***********************************************************************/
00691 
00692         final bool hasSurrogates (uint start=0, uint length=uint.max)
00693         {
00694                 pinIndices (start, length);
00695                 return codePoints (start, length) != length;
00696         }
00697 
00698         /***********************************************************************
00699         
00700                 Return the character at the specified position.
00701 
00702         ***********************************************************************/
00703 
00704         final wchar charAt (uint index)
00705         {       
00706                 return content [index];
00707         }
00708 
00709         /***********************************************************************
00710         
00711                 Return the length of the valid content
00712 
00713         ***********************************************************************/
00714 
00715         final uint length ()
00716         {
00717                 return len;
00718         }
00719 
00720         /***********************************************************************
00721         
00722                 The comparison can be done in code unit order or in code 
00723                 point order. They differ only in UTF-16 when comparing 
00724                 supplementary code points (U+10000..U+10ffff) to BMP code 
00725                 points near the end of the BMP (i.e., U+e000..U+ffff). 
00726 
00727                 In code unit order, high BMP code points sort after 
00728                 supplementary code points because they are stored as 
00729                 pairs of surrogates which are at U+d800..U+dfff.
00730 
00731         ***********************************************************************/
00732 
00733         final uint compare (UString other, bool codePointOrder=false)
00734         {
00735                 return compare (other.get, codePointOrder); 
00736         }
00737 
00738         /***********************************************************************
00739         
00740                 The comparison can be done in code unit order or in code 
00741                 point order. They differ only in UTF-16 when comparing 
00742                 supplementary code points (U+10000..U+10ffff) to BMP code 
00743                 points near the end of the BMP (i.e., U+e000..U+ffff). 
00744 
00745                 In code unit order, high BMP code points sort after 
00746                 supplementary code points because they are stored as 
00747                 pairs of surrogates which are at U+d800..U+dfff.
00748 
00749         ***********************************************************************/
00750 
00751         final uint compare (wchar[] other, bool codePointOrder=false)
00752         {
00753                 return u_strCompare (content, len, other, other.length, codePointOrder); 
00754         }
00755 
00756         /***********************************************************************
00757         
00758                 The comparison can be done in UTF-16 code unit order or 
00759                 in code point order. They differ only when comparing 
00760                 supplementary code points (U+10000..U+10ffff) to BMP code 
00761                 points near the end of the BMP (i.e., U+e000..U+ffff). 
00762 
00763                 In code unit order, high BMP code points sort after 
00764                 supplementary code points because they are stored as
00765                 pairs of surrogates which are at U+d800..U+dfff.
00766 
00767         ***********************************************************************/
00768 
00769         final uint compareFolded (UString other, CaseOption option = CaseOption.Default)
00770         {
00771                 return compareFolded (other.content, option);
00772         }
00773 
00774         /***********************************************************************
00775         
00776                 The comparison can be done in UTF-16 code unit order or 
00777                 in code point order. They differ only when comparing 
00778                 supplementary code points (U+10000..U+10ffff) to BMP code 
00779                 points near the end of the BMP (i.e., U+e000..U+ffff). 
00780 
00781                 In code unit order, high BMP code points sort after 
00782                 supplementary code points because they are stored as
00783                 pairs of surrogates which are at U+d800..U+dfff.
00784 
00785         ***********************************************************************/
00786 
00787         final uint compareFolded (wchar[] other, CaseOption option = CaseOption.Default)
00788         {
00789                 return compareFolded (get, other, option);
00790         }
00791 
00792         /***********************************************************************
00793         
00794                 Does this UText start with specified string?
00795 
00796         ***********************************************************************/
00797 
00798         final bool startsWith (UText other)
00799         {
00800                 return startsWith (other.get);
00801         }
00802 
00803         /***********************************************************************
00804         
00805                 Does this UText start with specified string?
00806 
00807         ***********************************************************************/
00808 
00809         final bool startsWith (wchar[] chars)
00810         {
00811                 if (len >= chars.length)
00812                     return compareFolded (content[0..chars.length], chars) == 0;
00813                 return false;
00814         }
00815 
00816         /***********************************************************************
00817         
00818                 Does this UText end with specified string?
00819 
00820         ***********************************************************************/
00821 
00822         final bool endsWith (UText other)
00823         {
00824                 return endsWith (other.get);
00825         }
00826 
00827         /***********************************************************************
00828         
00829                 Does this UText end with specified string?
00830 
00831         ***********************************************************************/
00832 
00833         final bool endsWith (wchar[] chars)
00834         {
00835                 if (len >= chars.length)
00836                     return compareFolded (content[len-chars.length..len], chars) == 0;
00837                 return false;
00838         }
00839 
00840         /***********************************************************************
00841         
00842                 Find the first occurrence of a BMP code point in a string.
00843                 A surrogate code point is found only if its match in the 
00844                 text is not part of a surrogate pair.
00845 
00846         ***********************************************************************/
00847 
00848         final uint indexOf (wchar c, uint start=0)
00849         {
00850                 pinIndex (start);
00851                 wchar* s = u_memchr (cast(wchar*)content+start, c, len-start);
00852                 if (s)
00853                     return s - cast(wchar*) content;
00854                 return -1;
00855         }
00856 
00857         /***********************************************************************
00858         
00859                 Find the first occurrence of a substring in a string. 
00860 
00861                 The substring is found at code point boundaries. That means 
00862                 that if the substring begins with a trail surrogate or ends 
00863                 with a lead surrogate, then it is found only if these 
00864                 surrogates stand alone in the text. Otherwise, the substring 
00865                 edge units would be matched against halves of surrogate pairs.
00866 
00867         ***********************************************************************/
00868 
00869         final uint indexOf (UText other, uint start=0)
00870         {
00871                 return indexOf (other.get, start);
00872         }
00873 
00874         /***********************************************************************
00875         
00876                 Find the first occurrence of a substring in a string. 
00877 
00878                 The substring is found at code point boundaries. That means 
00879                 that if the substring begins with a trail surrogate or ends 
00880                 with a lead surrogate, then it is found only if these 
00881                 surrogates stand alone in the text. Otherwise, the substring 
00882                 edge units would be matched against halves of surrogate pairs.
00883 
00884         ***********************************************************************/
00885 
00886         final uint indexOf (wchar[] chars, uint start=0)
00887         {
00888                 pinIndex (start);
00889                 wchar* s = u_strFindFirst (cast(wchar*)content+start, len-start, chars, chars.length);
00890                 if (s)
00891                     return s - cast(wchar*) content;
00892                 return -1;
00893         }
00894 
00895         /***********************************************************************
00896         
00897                 Find the last occurrence of a BMP code point in a string.
00898                 A surrogate code point is found only if its match in the 
00899                 text is not part of a surrogate pair.
00900 
00901         ***********************************************************************/
00902 
00903         final uint lastIndexOf (wchar c, uint start=uint.max)
00904         {
00905                 pinIndex (start);
00906                 wchar* s = u_memrchr (content, c, start);
00907                 if (s)
00908                     return s - cast(wchar*) content;
00909                 return -1;
00910         }
00911 
00912         /***********************************************************************
00913         
00914                 Find the last occurrence of a BMP code point in a string.
00915                 A surrogate code point is found only if its match in the 
00916                 text is not part of a surrogate pair.
00917 
00918         ***********************************************************************/
00919 
00920         final uint lastIndexOf (UText other, uint start=uint.max)
00921         {
00922                 return lastIndexOf (other.get, start);
00923         }
00924 
00925         /***********************************************************************
00926         
00927                 Find the last occurrence of a substring in a string. 
00928 
00929                 The substring is found at code point boundaries. That means 
00930                 that if the substring begins with a trail surrogate or ends 
00931                 with a lead surrogate, then it is found only if these 
00932                 surrogates stand alone in the text. Otherwise, the substring 
00933                 edge units would be matched against halves of surrogate pairs.
00934 
00935         ***********************************************************************/
00936 
00937         final uint lastIndexOf (wchar[] chars, uint start=uint.max)
00938         {
00939                 pinIndex (start);
00940                 wchar* s = u_strFindLast (content, start, chars, chars.length);
00941                 if (s)
00942                     return s - cast(wchar*) content;
00943                 return -1;
00944         }
00945 
00946         /***********************************************************************
00947 
00948                 Lowercase the characters into a seperate UString.
00949 
00950                 Casing is locale-dependent and context-sensitive. The 
00951                 result may be longer or shorter than the original. 
00952         
00953         ***********************************************************************/
00954 
00955         final UString toLower ()
00956         {
00957                return toLower (ULocale.Default);
00958         }
00959 
00960         /***********************************************************************
00961 
00962                 Lowercase the characters into a seperate UString.
00963 
00964                 Casing is locale-dependent and context-sensitive. The 
00965                 result may be longer or shorter than the original.
00966         
00967         ***********************************************************************/
00968 
00969         final UString toLower (inout ULocale locale)
00970         {
00971                 uint lower (wchar* dst, uint length, inout Error e)
00972                 {
00973                         return u_strToLower (dst, length, content, len, toString(locale.name), e);
00974                 }
00975 
00976                 return caseConvert (&lower);
00977         }
00978 
00979         /***********************************************************************
00980 
00981                 Uppercase the characters into a seperate UString.
00982 
00983                 Casing is locale-dependent and context-sensitive. The 
00984                 result may be longer or shorter than the original.
00985 
00986         ***********************************************************************/
00987 
00988         final UString toUpper ()
00989         {
00990                return toUpper (ULocale.Default);
00991         }
00992 
00993         /***********************************************************************
00994 
00995                 Uppercase the characters into a seperate UString.
00996 
00997                 Casing is locale-dependent and context-sensitive. The 
00998                 result may be longer or shorter than the original.
00999 
01000         ***********************************************************************/
01001 
01002         final UString toUpper (inout ULocale locale)
01003         {
01004                 uint upper (wchar* dst, uint length, inout Error e)
01005                 {
01006                         return u_strToUpper (dst, length, content, len, toString(locale.name), e);
01007                 }
01008 
01009                 return caseConvert (&upper);
01010         }
01011 
01012         /***********************************************************************
01013         
01014                 Case-fold the characters into a seperate UString.
01015 
01016                 Case-folding is locale-independent and not context-sensitive,
01017                 but there is an option for whether to include or exclude 
01018                 mappings for dotted I and dotless i that are marked with 'I' 
01019                 in CaseFolding.txt. The result may be longer or shorter than 
01020                 the original.
01021 
01022         ***********************************************************************/
01023 
01024         final UString toFolded (CaseOption option = CaseOption.Default)
01025         {
01026                 uint fold (wchar* dst, uint length, inout Error e)
01027                 {
01028                         return u_strFoldCase (dst, length, content, len, option, e);
01029                 }
01030 
01031                 return caseConvert (&fold);
01032         }
01033 
01034         /***********************************************************************
01035         
01036                 Unescape a string of characters and write the resulting
01037                 Unicode characters to the destination buffer.  The following 
01038                 escape sequences are recognized:
01039                 
01040                   uhhhh       4 hex digits; h in [0-9A-Fa-f]
01041                   Uhhhhhhhh   8 hex digits
01042                   xhh         1-2 hex digits
01043                   x{h...}     1-8 hex digits
01044                   ooo         1-3 octal digits; o in [0-7]
01045                   cX          control-X; X is masked with 0x1F
01046                  
01047                 as well as the standard ANSI C escapes:
01048                  
01049                   a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
01050                   v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
01051                   \\" =U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
01052                  
01053                 Anything else following a backslash is generically escaped.  
01054                 For example, "[a\\-z]" returns "[a-z]".
01055                  
01056                 If an escape sequence is ill-formed, this method returns an 
01057                 empty string.  An example of an ill-formed sequence is "\\u" 
01058                 followed by fewer than 4 hex digits.
01059                  
01060          ***********************************************************************/
01061 
01062         final UString unEscape () 
01063         {
01064                 UString result = new UString (len);
01065                 for (uint i=0; i < len;) 
01066                     {
01067                     dchar c = charAt(i++);
01068                     if (c == 0x005C) 
01069                        {
01070                        // bump index ...
01071                        c = u_unescapeAt (&_charAt, &i, len, cast(void*) this); 
01072 
01073                        // error?
01074                        if (c == 0xFFFFFFFF) 
01075                           {
01076                           result.truncate ();   // return empty string
01077                           break;                // invalid escape sequence
01078                           }
01079                        }
01080                     result.append (c);
01081                     }
01082                 return result;
01083         }
01084 
01085         /***********************************************************************
01086         
01087                 Callback for C unescapeAt() function
01088 
01089         ***********************************************************************/
01090 
01091         extern (C)
01092         {
01093                 typedef wchar function (uint offset, void* context) CharAt;
01094 
01095                 private static wchar _charAt (uint offset, void* context)
01096                 {
01097                         return (cast(UString) context).charAt (offset);
01098                 }
01099         }
01100 
01101         /***********************************************************************
01102         
01103                 Pin the given index to a valid position.
01104 
01105         ***********************************************************************/
01106 
01107         final private void pinIndex (inout uint x)
01108         {
01109                 if (x > len)
01110                     x = len;
01111         }
01112 
01113         /***********************************************************************
01114         
01115                 Pin the given index and length to a valid position.
01116 
01117         ***********************************************************************/
01118 
01119         final private void pinIndices (inout uint start, inout uint length)
01120         {
01121                 if (start > len) 
01122                     start = len;
01123 
01124                 if (length > (len - start))
01125                     length = len - start;
01126         }
01127 
01128         /***********************************************************************
01129         
01130                 Helper for comparison methods
01131 
01132         ***********************************************************************/
01133 
01134         final private uint compareFolded (wchar[] s1, wchar[] s2, CaseOption option = CaseOption.Default)
01135         {
01136                 Error e;
01137 
01138                 int x = u_strCaseCompare (s1, s1.length, s2, s2.length, option, e);
01139                 testError (e, "compareFolded failed");
01140                 return x; 
01141         }
01142 
01143         /***********************************************************************
01144         
01145                 Helper for conversion methods
01146 
01147         ***********************************************************************/
01148 
01149         final private UString caseConvert (UString.Formatter cvt)
01150         {
01151                 UString s = new UString (len + 32);
01152                 s.format (cvt, "case converter failed");
01153                 return s;
01154         }
01155 
01156         /***********************************************************************
01157         
01158                 Bind the ICU functions from a shared library. This is
01159                 complicated by the issues regarding D and DLLs on the
01160                 Windows platform
01161 
01162         ***********************************************************************/
01163 
01164         version (Win32)
01165         {
01166                 private static void*    library;
01167                 private static char[]   libraryName = "icuuc30.dll";     
01168 
01169                 /***************************************************************
01170 
01171                 ***************************************************************/
01172 
01173                 private static extern (C) 
01174                 {
01175                         wchar* function (wchar*, uint, wchar*, uint) u_strFindFirst;
01176                         wchar* function (wchar*, uint, wchar*, uint) u_strFindLast;
01177                         wchar* function (wchar*, wchar, uint) u_memchr;
01178                         wchar* function (wchar*, wchar, uint) u_memrchr;
01179                         int    function (wchar*, uint, wchar*, uint, bool) u_strCompare;
01180                         int    function (wchar*, uint, wchar*, uint, uint, inout Error) u_strCaseCompare;
01181                         dchar  function (CharAt, uint*, uint, void*) u_unescapeAt;
01182                         uint   function (wchar*, uint) u_countChar32;
01183                         uint   function (wchar*, uint, wchar*, uint, char*, inout Error) u_strToUpper;
01184                         uint   function (wchar*, uint, wchar*, uint, char*, inout Error) u_strToLower;
01185                         uint   function (wchar*, uint, wchar*, uint, uint, inout Error) u_strFoldCase;
01186                 }
01187 
01188                 /***************************************************************
01189 
01190                 ***************************************************************/
01191 
01192                 static  FunctionLoader.Bind[] targets = 
01193                         [
01194                         {cast(void**) &u_strFindFirst,      "u_strFindFirst"},
01195                         {cast(void**) &u_strFindLast,       "u_strFindLast"},
01196                         {cast(void**) &u_memchr,            "u_memchr"},
01197                         {cast(void**) &u_memrchr,           "u_memrchr"},
01198                         {cast(void**) &u_strCompare,        "u_strCompare"},
01199                         {cast(void**) &u_strCaseCompare,    "u_strCaseCompare"},
01200                         {cast(void**) &u_unescapeAt,        "u_unescapeAt"},
01201                         {cast(void**) &u_countChar32,       "u_countChar32"},
01202                         {cast(void**) &u_strToUpper,        "u_strToUpper"},
01203                         {cast(void**) &u_strToLower,        "u_strToLower"},
01204                         {cast(void**) &u_strFoldCase,       "u_strFoldCase"},
01205                         ];
01206 
01207                 /***************************************************************
01208 
01209                 ***************************************************************/
01210 
01211                 static this ()
01212                 {
01213                         library = FunctionLoader.bind (libraryName, targets);
01214                         //test ();
01215                 }
01216 
01217                 /***************************************************************
01218 
01219                 ***************************************************************/
01220 
01221                 static ~this ()
01222                 {
01223                         FunctionLoader.unbind (library);
01224                 }
01225         }
01226 
01227         /***********************************************************************
01228         
01229         ***********************************************************************/
01230 
01231         private static void test()
01232         {
01233                 UString s = new UString (r"aaaqw \uabcd eaaa", false);
01234                 UString t = new UString (s);
01235                 UString y = new UString (new UText(""));
01236 
01237                 int x = s.indexOf ("qwe");
01238                 s.unEscape ();
01239                 s.toUpper ();
01240                 s.padLeading(2).padTrailing(2).trim();
01241         }
01242 }

Generated on Sun Nov 7 19:06:54 2004 for Mango by doxygen 1.3.6