Main Page | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members | Related Pages

StringIndex.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file String.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032 
00033         class MutableString(T) : String!(T)
00034         {
00035                 T[] aliasOf ();
00036                 MutableString trim ();
00037                 MutableString append (int value);
00038                 MutableString append (long value);
00039                 MutableString append (double value);
00040                 MutableString append (char[] other);
00041                 MutableString append (wchar[] other);
00042                 MutableString append (dchar[] other);
00043                 MutableString append (T chr, int count=1);
00044                 MutableString append (String other, uint start=0, uint len=uint.max);
00045                 MutableString format (T[] format, ...);
00046                 MutableString layout (T[] layout ...);
00047                 MutableString set    (T chr, uint index);
00048                 MutableString setTo  (T[] chars, bool mutable=true);
00049                 MutableString setTo  (String other, bool mutable=true);
00050                 MutableString setTo  (String other, uint start, uint len, bool mutable=true);
00051                 MutableString insert (T[] other, uint index=0);
00052                 MutableString insert (String other, uint index=0);
00053                 MutableString insert (T other, uint index=0, uint count=1);
00054                 MutableString remove (uint start, uint length=uint.max);
00055                 MutableString truncate (uint length=0);
00056         }
00057 
00058         class String(T) : UtfString
00059         {
00060                 opApply (int delegate(inout T) dg);
00061                 T get (uint index);
00062                 uint toHash ();
00063                 uint length ();
00064                 bool equals (T[] other);
00065                 bool equals (String other);
00066                 bool endsWith (T[] other);
00067                 bool endsWith (String other);
00068                 bool startsWith (T[] other);
00069                 bool startsWith (String other);
00070                 uint indexOf (T c, uint start=0);
00071                 uint indexOf (T[] chars, uint start=0);
00072                 uint indexOf (String other, uint start=0);
00073                 uint lastIndexOf (T c, uint start=uint.max);
00074                 uint lastIndexOf (T[] chars, uint start=uint.max);
00075                 uint lastIndexOf (String other, uint start=uint.max);
00076                 T[] copy (T[] dst, uint start=0, uint len=uint.max);
00077         }
00078 
00079         class UtfString
00080         {
00081                 abstract char[]  utf8  (char[]  dst = null);
00082                 abstract wchar[] utf16 (wchar[] dst = null);
00083                 abstract dchar[] utf32 (dchar[] dst = null);
00084         }
00085 
00086 
00087 
00088         @version        Initial version, December 2005
00089               
00090         @author         Kris
00091 
00092 *******************************************************************************/
00093 
00094 module mango.text.String;
00095 
00096 private import  mango.text.Text,
00097                 mango.text.Token;
00098 
00099 private import  mango.convert.Type,
00100                 mango.convert.Format,
00101                 mango.convert.Unicode;
00102 
00103 /*******************************************************************************
00104 
00105 *******************************************************************************/
00106 
00107 private extern (C) void memmove (void* dst, void* src, uint bytes);
00108 
00109 /*******************************************************************************
00110 
00111         MutableString is a string class that stores Unicode characters  
00112         and provides functionality similar to the Java String class.
00113 
00114         Indexes and offsets into and lengths of strings always count 
00115         code units, not code points. This is the same as with multi-byte 
00116         char* strings in traditional string handling. Operations on strings 
00117         typically do not test for code point boundaries. If necessary, the 
00118         user needs to take care of such boundaries by testing for the code 
00119         unit values
00120 
00121         MutableString methods are lenient with regard to input parameter 
00122         values. In particular, if any provided indexes are out of bounds 
00123         (< 0 or > length) then they are "pinned" to the nearest boundary.
00124        
00125 *******************************************************************************/
00126 
00127 class MutableStringTemplate(T) : StringTemplate!(T)
00128 {
00129         private alias append                    opCat;
00130         private alias set                       opIndexAssign;       
00131         private alias MutableStringTemplate     MutableString;
00132 
00133         private alias FormatStructTemplate!(T)  Format;
00134         private alias Unicode.Into!(T)          Into;
00135 
00136         private Into                    into;           // unicode converter
00137         private T[]                     scratch;        // formatting scratchpad
00138         private T[]                     converts;       // unicode buffer
00139         private Format                  formatter;      // printf formatter
00140 
00141 
00142         /***********************************************************************
00143         
00144                 Create an empty MutableString with the specified available 
00145                 space
00146 
00147         ***********************************************************************/
00148 
00149         this (uint space = 0)
00150         {
00151                 content.length = space;
00152                 mutable = true;
00153                 setup ();
00154         }
00155 
00156         /***********************************************************************
00157         
00158                 Create a MutableString upon the provided content. If said 
00159                 content is immutable (read-only) then you might consider 
00160                 setting the 'mutable' parameter to false. Doing so will 
00161                 avoid allocating heap-space for the content until it is 
00162                 modified.
00163 
00164         ***********************************************************************/
00165 
00166         this (T[] content, bool mutable = true)
00167         {
00168                 setTo (content, mutable);
00169                 setup ();
00170         }
00171 
00172         /***********************************************************************
00173         
00174                 Create a MutableString via the content of a MutableString. 
00175                 If said content is immutable (read-only) then you might 
00176                 consider setting the 'mutable' parameter to false. Doing 
00177                 so will avoid allocating heap-space for the content until 
00178                 it is modified via MutableString methods.
00179 
00180         ***********************************************************************/
00181         
00182         this (MutableString other, bool mutable = true)
00183         {
00184                 this (other.get, mutable);
00185         }
00186 
00187         /***********************************************************************
00188         
00189                 Create a MutableString via the content of a String. Note 
00190                 that the default is to assume the content is immutable
00191                 
00192         ***********************************************************************/
00193         
00194         this (String other, bool mutable = false)
00195         {
00196                 this (other.get, mutable);
00197         }
00198 
00199         /***********************************************************************
00200         
00201                 Return an alias to the content of this MutableString
00202 
00203         ***********************************************************************/
00204 
00205         T[] aliasOf ()
00206         {
00207                 return get ();
00208         }
00209 
00210         /***********************************************************************
00211         
00212                 Remove leading and trailing whitespace from this String.
00213                 Note that we slice the content to remove leading space.
00214 
00215         ***********************************************************************/
00216 
00217         String trim ()
00218         {
00219                 content = utils.trim (get());
00220                 len = content.length;
00221                 return this;
00222         }
00223 
00224         /***********************************************************************
00225         
00226                 Append an integer to this MutableString, using standard 
00227                 printf() notation
00228 
00229         ***********************************************************************/
00230 
00231         MutableString append (int v, T[] format=null)
00232         {
00233                 formatter (format, &v, v.sizeof, Type.Int);
00234                 return this;
00235         }
00236 
00237         /***********************************************************************
00238         
00239                 Append a long to this MutableString, using standard 
00240                 printf() notation
00241 
00242         ***********************************************************************/
00243 
00244         MutableString append (long v, T[] format=null)
00245         {
00246                 formatter (format, &v, v.sizeof, Type.Long);
00247                 return this;
00248         }
00249 
00250         /***********************************************************************
00251         
00252                 Append a double to this MutableString, using standard 
00253                 printf() notation
00254 
00255         ***********************************************************************/
00256 
00257         MutableString append (double v, T[] format=null)
00258         {
00259                 formatter (format, &v, v.sizeof, Type.Double);
00260                 return this;
00261         }
00262 
00263         /***********************************************************************
00264         
00265                 Append text to this MutableString
00266 
00267         ***********************************************************************/
00268 
00269         MutableString append (char[] chars)
00270         {
00271                 convert (chars, Type.Utf8);
00272                 return this;
00273         }
00274 
00275         /***********************************************************************
00276         
00277                 Append text to this MutableString
00278 
00279         ***********************************************************************/
00280 
00281         MutableString append (wchar[] chars)
00282         {
00283                 convert (chars, Type.Utf16);
00284                 return this;
00285         }
00286 
00287         /***********************************************************************
00288         
00289                 Append text to this MutableString
00290 
00291         ***********************************************************************/
00292 
00293         MutableString append (dchar[] chars)
00294         {
00295                 convert (chars, Type.Utf32);
00296                 return this;
00297         }
00298 
00299         /***********************************************************************
00300         
00301                 Append a count of characters to this MutableString
00302 
00303         ***********************************************************************/
00304 
00305         MutableString append (T chr, int count=1)
00306         {
00307                 expand (count);
00308                 len += count;
00309                 return set (chr, len-count, count);
00310         }
00311 
00312         /***********************************************************************
00313         
00314                 Append partial text to this MutableString
00315 
00316         ***********************************************************************/
00317 
00318         MutableString append (String other, uint start=0, uint len=uint.max)
00319         {
00320                 other.pinIndices (start, len);
00321                 return append (other.content [start..start+len]);
00322         }
00323 
00324         /**********************************************************************
00325 
00326                 Format a set of arguments using the standard printf()
00327                 formatting notation
00328 
00329         **********************************************************************/
00330 
00331         MutableString format (T[] fmt, ...)
00332         {
00333                 formatter (fmt, _arguments, _argptr);
00334                 return this;
00335         }
00336 
00337         /***********************************************************************
00338                 
00339                 Set a section of this MutableString to the specified 
00340                 character
00341 
00342         ***********************************************************************/
00343 
00344         MutableString set (T chr, uint start=0, uint count=1)
00345         {
00346                 pinIndices (start, count);
00347                 if (! mutable)
00348                       realloc ();
00349                 content [start..start+count] = chr;
00350                 return this;
00351         }
00352 
00353         /***********************************************************************
00354    
00355                 Set the content to the provided array. Parameter 'mutable'
00356                 specifies whether the given array is likely to change. If 
00357                 not, the array is aliased until such time it is altered.
00358                      
00359         ***********************************************************************/
00360 
00361         MutableString setTo (T[] chars, bool mutable = true)
00362         {
00363                 len = chars.length;
00364                 if ((this.mutable = mutable) == true)
00365                      content = chars.dup;
00366                 else
00367                    content = chars;
00368                 return this;
00369         }
00370 
00371         /***********************************************************************
00372         
00373                 Replace the content of this MutableString. If the new content
00374                 is immutable (read-only) then you might consider setting the
00375                 'mutable' parameter to false. Doing so will avoid allocating
00376                 heap-space for the content until it is modified via one of
00377                 these methods.
00378 
00379         ***********************************************************************/
00380 
00381         MutableString setTo (String other, bool mutable = true)
00382         {
00383                 return setTo (other.get, mutable);
00384         }
00385 
00386         /***********************************************************************
00387         
00388                 Replace the content of this MutableString. If the new content
00389                 is immutable (read-only) then you might consider setting the
00390                 'mutable' parameter to false. Doing so will avoid allocating
00391                 heap-space for the content until it is modified via one of
00392                 these methods.
00393 
00394         ***********************************************************************/
00395 
00396         MutableString setTo (String other, uint start, uint count, bool mutable = true)
00397         {
00398                 other.pinIndices (start, count);
00399                 return setTo (other.content [start..start+count], mutable);
00400         }
00401 
00402         /***********************************************************************
00403         
00404                 Insert characters into this MutableString
00405 
00406         ***********************************************************************/
00407 
00408         MutableString insert (T chr, uint index=0, uint count=1)
00409         {
00410                 pinIndices (index, count);
00411                 expand (index, count);
00412                 return set (chr, index, count);
00413         }
00414 
00415         /***********************************************************************
00416         
00417                 Insert text into this MutableString
00418 
00419         ***********************************************************************/
00420 
00421         MutableString insert (T[] other, uint index=0)
00422         {
00423                 pinIndex (index);
00424                 expand (index, other.length);
00425                 content[index..index+other.length] = other;
00426                 return this;
00427         }
00428 
00429         /***********************************************************************
00430         
00431                 Insert another String into this MutableString
00432 
00433         ***********************************************************************/
00434 
00435         MutableString insert (String other, uint index=0)
00436         {       
00437                 return insert (other.get, index);
00438         }
00439 
00440         /***********************************************************************
00441         
00442                 Remove a piece of this MutableString.
00443 
00444         ***********************************************************************/
00445 
00446         MutableString remove (uint start, uint length=uint.max)
00447         {
00448                 pinIndices (start, length);
00449                 if (length)
00450                     if (start >= len)
00451                         truncate (start);
00452                     else
00453                        {
00454                        if (! mutable)
00455                              realloc ();
00456 
00457                        uint i = start + length;
00458                        memmove (&content[start], &content[i], (len-i) * T.sizeof);
00459                        len -= length;
00460                        }
00461                 return this;
00462         }
00463 
00464         /***********************************************************************
00465         
00466                 Truncate the length of this MutableString.
00467 
00468         ***********************************************************************/
00469 
00470         MutableString truncate (uint length=0)
00471         {
00472                 if (length <= len)
00473                     len = length;
00474                 return this;
00475         }
00476 
00477         /**********************************************************************
00478 
00479                 Arranges text strings in order, using indices to specify 
00480                 where each particular argument should be positioned within 
00481                 the text. This is handy for collating I18N components.
00482 
00483                 @code
00484                 auto string = new MutableString;
00485 
00486                 string.layout ("%2 %1", "one", "two");
00487                 @endcode
00488 
00489                 The index numbers range from one through nine      
00490               
00491         **********************************************************************/
00492 
00493         MutableString layout (T[][] layout ...)
00494         {
00495                 int     args;
00496                 bool    state;
00497 
00498                 args = layout.length - 1;
00499                 foreach (T c; layout[0])
00500                         {
00501                         if (state)
00502                            {
00503                            state = false;
00504                            if (c >= '1' || c <= '9')
00505                               {
00506                               uint index = c - '0';
00507                               if (index <= args)
00508                                  {
00509                                  append (layout[index]);
00510                                  continue;
00511                                  }
00512                               else
00513                                  formatter.error ("TextLayout : invalid argument");
00514                               }
00515                            }
00516                         else
00517                            if (c == '%')
00518                               {
00519                               state = true;
00520                               continue;
00521                               }
00522                         append (c);
00523                         }
00524                 return this;
00525         }
00526 
00527         /***********************************************************************
00528         
00529                 Check for available space within the buffer, and expand 
00530                 as necessary.
00531 
00532         ***********************************************************************/
00533 
00534         private final void expand (uint count)
00535         {
00536                 if (!mutable || (len + count) > content.length)
00537                      realloc (count);
00538         }
00539 
00540         /***********************************************************************
00541         
00542                 make room available to insert something
00543 
00544         ***********************************************************************/
00545 
00546         private final void expand (uint index, uint count)
00547         {
00548                 expand (count);
00549                 memmove (&content[index+count], &content[index], (len - index) * T.sizeof);
00550                 len += count;                
00551         }
00552 
00553         /***********************************************************************
00554         
00555                 Allocate memory due to a change in the content. We handle 
00556                 the distinction between mutable and immutable here.
00557 
00558         ***********************************************************************/
00559 
00560         private final void realloc (uint count = 0)
00561         {
00562                 uint size = (content.length + count + 127) & ~127;
00563                 
00564                 if (mutable)
00565                     content.length = size;
00566                 else
00567                    {
00568                    mutable = true;
00569                    T[] x = content;
00570                    content = new T[size];
00571                    if (len)
00572                        content[0..len] = x;
00573                    }
00574         }
00575 
00576         /***********************************************************************
00577         
00578                 Internal method to support MutableString appending
00579 
00580         ***********************************************************************/
00581 
00582         private final MutableString append (T* chars, uint count)
00583         {
00584                 expand (count);
00585                 content[len..len+count] = chars[0..count];
00586                 len += count;
00587                 return this;
00588         }
00589 
00590         /***********************************************************************
00591         
00592                 Initialize this MutableString. Allocate conversion buffers
00593                 and prime the formatter
00594 
00595         ***********************************************************************/
00596 
00597         private void setup (Format.DblFormat df = null)
00598         {
00599                 
00600                 scratch  = new T[64];
00601                 converts = new T[256];
00602                 formatter.ctor (&convert, null, scratch, df);                
00603         }
00604 
00605         /**********************************************************************
00606 
00607                 Support for the formatter, to convert from one encoding
00608                 to another
00609 
00610         **********************************************************************/
00611 
00612         private uint convert (void[] v, uint type)   
00613         {
00614                 // convert as required
00615                 auto s = cast(T[]) into.convert (v, type, converts);
00616                         
00617                 // hang onto conversion buffer when it grows
00618                 if (s.length > converts.length)
00619                     converts = s;
00620 
00621                 // append to string
00622                 append (s.ptr, s.length);
00623                 return s.length;
00624         }
00625 
00626 
00627 /+
00628         // freelist support
00629 //        private MutableString           next;   
00630 //        private static uint             count;
00631 //        private static MutableString    freelist;
00632 
00633         /***********************************************************************
00634         
00635                 return an instance of MutableString. This should be used 
00636                 for high volume instantiation, since it can avoid memory
00637                 allocation.
00638 
00639         ***********************************************************************/
00640 
00641         static final synchronized MutableString create ()
00642         {
00643                 MutableString s;
00644 
00645                 if (freelist)
00646                    {
00647                    --count;
00648                    s = freelist;
00649                    freelist = s.next;
00650                    }  
00651                 else
00652                    s = new MutableString;  
00653                 return s;
00654         }
00655 
00656         /***********************************************************************
00657         
00658                 return an instance of MutableString. This should be used 
00659                 for high volume instantiation, since it can avoid memory
00660                 allocation.
00661 
00662         ***********************************************************************/
00663 
00664         private static synchronized void deallocate (MutableString s)
00665         {
00666                 if (count < MaxCache)
00667                    {
00668                    s.len = 0;
00669                    s.content = null;
00670                    s.next = freelist;
00671                    freelist = s;
00672                    ++count;
00673                    }
00674         }
00675 +/
00676 }
00677 
00678 
00679 
00680 /*******************************************************************************
00681 
00682         Immutable string.
00683 
00684         Note that there's a DMD 141 bug whereby the implementation of
00685         ConvertingString somehow gets hidden if you move the copy() 
00686         method above the utf() methods! Shoving copy() right at the
00687         end seems to resolve it for now :-(
00688 
00689 *******************************************************************************/
00690 
00691 class StringTemplate(T) : UtfString
00692 {
00693         protected alias get             opIndex;
00694         protected alias StringTemplate  String;
00695 
00696         // the core of the String and MutableString attributes. The name 'len'
00697         // is used rather than the more obvious 'length' since there is
00698         // a collision with the silly array[length] syntactic sugar ...
00699         protected uint                  len;
00700         package T[]                     content;
00701 
00702         protected Unicode.From!(T)      from;
00703         protected TextTemplate!(T)      utils;
00704 
00705         // this should probably be in MutableString only, but there seems to 
00706         // be a compiler bug where it doesn't get initialised correctly,
00707         // and it's perhaps useful to have here for when a MutableString is
00708         // passed as a String argument.
00709         protected bool                  mutable;
00710 
00711         /***********************************************************************
00712         
00713                 Hidden constructor
00714 
00715         ***********************************************************************/
00716 
00717         private this ()
00718         {
00719         }
00720 
00721         /***********************************************************************
00722         
00723                 Construct read-only wrapper around the given content
00724 
00725         ***********************************************************************/
00726 
00727         this (T[] content)
00728         {
00729                 this.content = content;
00730                 this.len = content.length;
00731         }
00732 
00733         /***********************************************************************
00734         
00735                 Return the character at the specified position.
00736 
00737         ***********************************************************************/
00738 
00739         final T get (uint index)
00740         {
00741                 if (index >= len)
00742                     error ("index of out bounds"); 
00743                 return content [index];
00744         }
00745 
00746         /***********************************************************************
00747         
00748                 Hash this String
00749 
00750         ***********************************************************************/
00751 
00752         final override uint toHash ()
00753         {
00754                 return hash (content[0..len]);
00755         }
00756 
00757         /***********************************************************************
00758         
00759                 Return the length of the valid content
00760 
00761         ***********************************************************************/
00762 
00763         final uint length ()
00764         {
00765                 return len;
00766         }
00767 
00768         /***********************************************************************
00769         
00770                 Is this String equal to another?
00771 
00772         ***********************************************************************/
00773 
00774         final bool equals (String other)
00775         {
00776                 if (other is this)
00777                     return true;
00778                 return equals (other.get());
00779         }
00780 
00781         /***********************************************************************
00782         
00783                 Is this String equal to the provided text?
00784 
00785         ***********************************************************************/
00786 
00787         final bool equals (T[] other)
00788         {
00789                 if (other.length == len)
00790                     return utils.equal (other.ptr, content.ptr, len);
00791                 return false;
00792         }
00793 
00794         /***********************************************************************
00795         
00796                 Does this String end with specified string?
00797 
00798         ***********************************************************************/
00799 
00800         final bool endsWith (String other)
00801         {
00802                 return endsWith (other.get);
00803         }
00804 
00805         /***********************************************************************
00806         
00807                 Does this String end with specified string?
00808 
00809         ***********************************************************************/
00810 
00811         final bool endsWith (T[] chars)
00812         {
00813                 if (chars.length <= len)
00814                     return utils.equal (content[len-chars.length..len].ptr, chars.ptr, chars.length);
00815                 return false;
00816         }
00817 
00818         /***********************************************************************
00819         
00820                 Does this String start with specified string?
00821 
00822         ***********************************************************************/
00823 
00824         final bool startsWith (String other)
00825         {
00826                 return startsWith (other.get);
00827         }
00828 
00829         /***********************************************************************
00830         
00831                 Does this String start with specified string?
00832 
00833         ***********************************************************************/
00834 
00835         final bool startsWith (T[] chars)
00836         {
00837                 if (chars.length <= len)
00838                     return utils.equal (content.ptr, chars.ptr, chars.length);
00839                 return false;
00840         }
00841 
00842         /***********************************************************************
00843         
00844                 Find the first occurrence of a BMP code point in a string.
00845                 A surrogate code point is found only if its match in the 
00846                 text is not part of a surrogate pair.
00847 
00848         ***********************************************************************/
00849 
00850         final uint indexOf (T c, uint start=0)
00851         {
00852                 return utils.indexOf (content[0..len], c, start);
00853         }
00854 
00855         /***********************************************************************
00856         
00857                 Find the first occurrence of a substring in a string. 
00858 
00859                 The substring is found at code point boundaries. That means 
00860                 that if the substring begins with a trail surrogate or ends 
00861                 with a lead surrogate, then it is found only if these 
00862                 surrogates stand alone in the text. Otherwise, the substring 
00863                 edge units would be matched against halves of surrogate pairs.
00864 
00865         ***********************************************************************/
00866 
00867         final uint indexOf (String other, uint start=0)
00868         {
00869                 return indexOf (other.get, start);
00870         }
00871 
00872         /***********************************************************************
00873         
00874                 Find the first occurrence of a substring in a string. 
00875 
00876                 The substring is found at code point boundaries. That means 
00877                 that if the substring begins with a trail surrogate or ends 
00878                 with a lead surrogate, then it is found only if these 
00879                 surrogates stand alone in the text. Otherwise, the substring 
00880                 edge units would be matched against halves of surrogate pairs.
00881 
00882         ***********************************************************************/
00883 
00884         final uint indexOf (T[] chars, uint start=0)
00885         {
00886                 return utils.indexOf (content[0..len], chars, start);
00887         }
00888 
00889         /***********************************************************************
00890         
00891                 Find the last occurrence of a BMP code point in a string.
00892                 A surrogate code point is found only if its match in the 
00893                 text is not part of a surrogate pair.
00894 
00895         ***********************************************************************/
00896 
00897         final uint lastIndexOf (T c, uint start=uint.max)
00898         {
00899                 error ("lastIndexOf() currently unimplemented");
00900                 pinIndex (start);
00901                 return -1;
00902         }
00903 
00904         /***********************************************************************
00905         
00906                 Find the last occurrence of a BMP code point in a string.
00907                 A surrogate code point is found only if its match in the 
00908                 text is not part of a surrogate pair.
00909 
00910         ***********************************************************************/
00911 
00912         final uint lastIndexOf (String other, uint start=uint.max)
00913         {
00914                 error ("lastIndexOf() currently unimplemented");
00915                 return lastIndexOf (other.get, start);
00916         }
00917 
00918         /***********************************************************************
00919         
00920                 Find the last occurrence of a substring in a string. 
00921 
00922                 The substring is found at code point boundaries. That means 
00923                 that if the substring begins with a trail surrogate or ends 
00924                 with a lead surrogate, then it is found only if these 
00925                 surrogates stand alone in the text. Otherwise, the substring 
00926                 edge units would be matched against halves of surrogate pairs.
00927 
00928         ***********************************************************************/
00929 
00930         final uint lastIndexOf (T[] chars, uint start=uint.max)
00931         {
00932                 error ("lastIndexOf() currently unimplemented");
00933                 pinIndex (start);
00934                 return -1;
00935         }
00936 
00937         /***********************************************************************
00938         
00939                 Return content from this String 
00940                 
00941                 A slice of dst is returned, representing a copy of the 
00942                 content. The slice is clipped to the minimum of either 
00943                 the length of the provided array, or the length of the 
00944                 content minus the stipulated start point
00945 
00946         ***********************************************************************/
00947 
00948         final T[] copy (T[] dst, uint start=0, uint length=uint.max)
00949         {
00950                 pinIndices (start, length);
00951                 uint i = length - start;
00952                 if (i > dst.length)
00953                     i = dst.length;
00954 
00955                 dst [0..i] = content [start..start+i];
00956                 return dst [0..i];
00957         }
00958 /+
00959         /***********************************************************************
00960         
00961                 Copy a section of this String into a MutableString
00962 
00963         ***********************************************************************/
00964 
00965         MutableStringTemplate!(T) copy (MutableStringTemplate!(T) dst, uint start=0, uint len=uint.max)
00966         {
00967                 pinIndices (start, len);
00968                 return dst.setTo (content[start..start+len], true);
00969         }
00970 +/
00971         /***********************************************************************
00972         
00973                 Bind this string to a token
00974 
00975         ***********************************************************************/
00976 
00977         final void bind (TokenTemplate!(T) token)
00978         {
00979                 token.prime (get());
00980         }
00981 
00982         /***********************************************************************
00983 
00984                 Convert to the AbstractString types. The optional argument
00985                 dst with be resized as required to house the conversion. 
00986                 To minimize heap allocation, use the following pattern:
00987 
00988                         String  string;
00989 
00990                         wchar[] buffer;
00991                         wchar[] result = string.toUtf16 (buffer);
00992 
00993                         if (result.length > buffer.length)
00994                             buffer = result;
00995 
00996                You can also provide a buffer from the stack, but the output 
00997                will be moved to the heap if said buffer is not large enough
00998 
00999         ***********************************************************************/
01000 
01001         char[] utf8 (char[] dst = null)
01002         {
01003                 return cast(char[]) from.convert (get(), Type.Utf8, dst);
01004         }
01005 
01006         wchar[] utf16 (wchar[] dst = null)
01007         {
01008                 return cast(wchar[]) from.convert (get(), Type.Utf16, dst);
01009         }
01010 
01011         dchar[] utf32 (dchar[] dst = null)
01012         {
01013                 return cast(dchar[]) from.convert (get(), Type.Utf32, dst);
01014         }
01015 
01016         /**********************************************************************
01017 
01018                 Iterate over the characters in this string. Note that 
01019                 this is a read-only freachable ~ the worst a user can
01020                 do is alter the temporary 'c'
01021 
01022         **********************************************************************/
01023 
01024         int opApply (int delegate(inout T) dg)
01025         {
01026                 int result = 0;
01027 
01028                 foreach (T c; get())
01029                          if ((result = dg (c)) != 0)
01030                              break;
01031                 return result;
01032         }
01033 
01034         /***********************************************************************
01035         
01036                 Compare this String to another
01037 
01038         ***********************************************************************/
01039 
01040         final override int opCmp (Object o)
01041         {
01042                 return !opEquals (o);
01043         }
01044 
01045         /***********************************************************************
01046         
01047                 Is this String equal to another?
01048 
01049         ***********************************************************************/
01050 
01051         final override int opEquals (Object o)
01052         {
01053                 auto other = cast (String) o;
01054 
01055                 if (other is null)
01056                     return 0;
01057 
01058                 return equals (other);
01059         }
01060 
01061         /**********************************************************************
01062 
01063             hash() -- hash a variable-length key into a 32-bit value
01064 
01065               k     : the key (the unaligned variable-length array of bytes)
01066               len   : the length of the key, counting by bytes
01067               level : can be any 4-byte value
01068 
01069             Returns a 32-bit value.  Every bit of the key affects every bit of
01070             the return value.  Every 1-bit and 2-bit delta achieves avalanche.
01071 
01072             About 4.3*len + 80 X86 instructions, with excellent pipelining
01073 
01074             The best hash table sizes are powers of 2.  There is no need to do
01075             mod a prime (mod is sooo slow!).  If you need less than 32 bits,
01076             use a bitmask.  For example, if you need only 10 bits, do
01077 
01078                         h = (h & hashmask(10));
01079 
01080             In which case, the hash table should have hashsize(10) elements.
01081             If you are hashing n strings (ub1 **)k, do it like this:
01082 
01083                         for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
01084 
01085             By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use 
01086             this code any way you wish, private, educational, or commercial.  
01087             It's free.
01088             
01089             See http://burlteburtle.net/bob/hash/evahash.html
01090             Use for hash table lookup, or anything where one collision in 2^32 
01091             is acceptable. Do NOT use for cryptographic purposes.
01092 
01093         **********************************************************************/
01094 
01095         static final uint hash (void[] x, uint c = 0)
01096         {
01097             uint    a,
01098                     b;
01099 
01100             a = b = 0x9e3779b9; 
01101 
01102             uint len = x.length;
01103             ubyte* k = cast(ubyte *) x.ptr;
01104 
01105             // handle most of the key 
01106             while (len >= 12) 
01107                   {
01108                   a += *cast(uint *)(k+0);
01109                   b += *cast(uint *)(k+4);
01110                   c += *cast(uint *)(k+8);
01111 
01112                   a -= b; a -= c; a ^= (c>>13); 
01113                   b -= c; b -= a; b ^= (a<<8); 
01114                   c -= a; c -= b; c ^= (b>>13); 
01115                   a -= b; a -= c; a ^= (c>>12);  
01116                   b -= c; b -= a; b ^= (a<<16); 
01117                   c -= a; c -= b; c ^= (b>>5); 
01118                   a -= b; a -= c; a ^= (c>>3);  
01119                   b -= c; b -= a; b ^= (a<<10); 
01120                   c -= a; c -= b; c ^= (b>>15); 
01121                   k += 12; len -= 12;
01122                   }
01123 
01124             // handle the last 11 bytes 
01125             c += x.length;
01126             switch (len)
01127                    {
01128                    case 11: c+=(cast(uint)k[10]<<24);
01129                    case 10: c+=(cast(uint)k[9]<<16);
01130                    case 9 : c+=(cast(uint)k[8]<<8);
01131                    case 8 : b+=(cast(uint)k[7]<<24);
01132                    case 7 : b+=(cast(uint)k[6]<<16);
01133                    case 6 : b+=(cast(uint)k[5]<<8);
01134                    case 5 : b+=k[4];
01135                    case 4 : a+=(cast(uint)k[3]<<24);
01136                    case 3 : a+=(cast(uint)k[2]<<16);
01137                    case 2 : a+=(cast(uint)k[1]<<8);
01138                    case 1 : a+=k[0];
01139                    default:
01140                    }
01141 
01142             a -= b; a -= c; a ^= (c>>13); 
01143             b -= c; b -= a; b ^= (a<<8); 
01144             c -= a; c -= b; c ^= (b>>13); 
01145             a -= b; a -= c; a ^= (c>>12);  
01146             b -= c; b -= a; b ^= (a<<16); 
01147             c -= a; c -= b; c ^= (b>>5); 
01148             a -= b; a -= c; a ^= (c>>3);  
01149             b -= c; b -= a; b ^= (a<<10); 
01150             c -= a; c -= b; c ^= (b>>15); 
01151 
01152             return c;
01153         }
01154 
01155         /***********************************************************************
01156         
01157                 Throw an exception
01158 
01159         ***********************************************************************/
01160 
01161         package final void error (char[] msg)
01162         {
01163                 static class TextException : Exception
01164                 {
01165                         this (char[] msg)
01166                         {
01167                                 super (msg);
01168                         }
01169                 }
01170 
01171                 throw new TextException (msg);
01172         }
01173 
01174         /***********************************************************************
01175         
01176                 Return the valid content from this String
01177 
01178         ***********************************************************************/
01179 
01180         package final T[] get ()
01181         {
01182                 return content [0..len];
01183         }
01184 
01185         /***********************************************************************
01186         
01187                 Pin the given index to a valid position.
01188 
01189         ***********************************************************************/
01190 
01191         package final void pinIndex (inout uint x)
01192         {
01193                 if (x > len)
01194                     x = len;
01195         }
01196 
01197         /***********************************************************************
01198         
01199                 Pin the given index and length to a valid position.
01200 
01201         ***********************************************************************/
01202 
01203         package final void pinIndices (inout uint start, inout uint length)
01204         {
01205                 if (start > len) 
01206                     start = len;
01207 
01208                 if (length > (len - start))
01209                     length = len - start;
01210         }
01211 }       
01212 
01213 
01214 /*******************************************************************************
01215 
01216         A string that converts to anything
01217 
01218 *******************************************************************************/
01219 
01220 class UtfString
01221 {
01222         abstract char[]  utf8  (char[]  dst = null);
01223 
01224         abstract wchar[] utf16 (wchar[] dst = null);
01225 
01226         abstract dchar[] utf32 (dchar[] dst = null);
01227 }
01228 
01229 
01230 
01231 alias StringTemplate!(char) Utf8String;
01232 alias MutableStringTemplate!(char) Utf8MutableString;
01233 

Generated on Sat Dec 24 17:28:33 2005 for Mango by  doxygen 1.4.0