00001 /******************************************************************************* 00002 00003 @file String.d 00004 00005 Copyright (c) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 4. Derivative works are permitted, but they must carry this notice 00027 in full and credit the original source. 00028 00029 00030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00031 00032 00033 class MutableString(T) : String!(T) 00034 { 00035 T[] aliasOf (); 00036 MutableString trim (); 00037 MutableString append (int value); 00038 MutableString append (long value); 00039 MutableString append (double value); 00040 MutableString append (char[] other); 00041 MutableString append (wchar[] other); 00042 MutableString append (dchar[] other); 00043 MutableString append (T chr, int count=1); 00044 MutableString append (String other, uint start=0, uint len=uint.max); 00045 MutableString format (T[] format, ...); 00046 MutableString layout (T[] layout ...); 00047 MutableString set (T chr, uint index); 00048 MutableString setTo (T[] chars, bool mutable=true); 00049 MutableString setTo (String other, bool mutable=true); 00050 MutableString setTo (String other, uint start, uint len, bool mutable=true); 00051 MutableString insert (T[] other, uint index=0); 00052 MutableString insert (String other, uint index=0); 00053 MutableString insert (T other, uint index=0, uint count=1); 00054 MutableString remove (uint start, uint length=uint.max); 00055 MutableString truncate (uint length=0); 00056 } 00057 00058 class String(T) : UtfString 00059 { 00060 opApply (int delegate(inout T) dg); 00061 T get (uint index); 00062 uint toHash (); 00063 uint length (); 00064 bool equals (T[] other); 00065 bool equals (String other); 00066 bool endsWith (T[] other); 00067 bool endsWith (String other); 00068 bool startsWith (T[] other); 00069 bool startsWith (String other); 00070 uint indexOf (T c, uint start=0); 00071 uint indexOf (T[] chars, uint start=0); 00072 uint indexOf (String other, uint start=0); 00073 uint lastIndexOf (T c, uint start=uint.max); 00074 uint lastIndexOf (T[] chars, uint start=uint.max); 00075 uint lastIndexOf (String other, uint start=uint.max); 00076 T[] copy (T[] dst, uint start=0, uint len=uint.max); 00077 } 00078 00079 class UtfString 00080 { 00081 abstract char[] utf8 (char[] dst = null); 00082 abstract wchar[] utf16 (wchar[] dst = null); 00083 abstract dchar[] utf32 (dchar[] dst = null); 00084 } 00085 00086 00087 00088 @version Initial version, December 2005 00089 00090 @author Kris 00091 00092 *******************************************************************************/ 00093 00094 module mango.text.String; 00095 00096 private import mango.text.Text, 00097 mango.text.Token; 00098 00099 private import mango.convert.Type, 00100 mango.convert.Format, 00101 mango.convert.Unicode; 00102 00103 /******************************************************************************* 00104 00105 *******************************************************************************/ 00106 00107 private extern (C) void memmove (void* dst, void* src, uint bytes); 00108 00109 /******************************************************************************* 00110 00111 MutableString is a string class that stores Unicode characters 00112 and provides functionality similar to the Java String class. 00113 00114 Indexes and offsets into and lengths of strings always count 00115 code units, not code points. This is the same as with multi-byte 00116 char* strings in traditional string handling. Operations on strings 00117 typically do not test for code point boundaries. If necessary, the 00118 user needs to take care of such boundaries by testing for the code 00119 unit values 00120 00121 MutableString methods are lenient with regard to input parameter 00122 values. In particular, if any provided indexes are out of bounds 00123 (< 0 or > length) then they are "pinned" to the nearest boundary. 00124 00125 *******************************************************************************/ 00126 00127 class MutableStringTemplate(T) : StringTemplate!(T) 00128 { 00129 private alias append opCat; 00130 private alias set opIndexAssign; 00131 private alias MutableStringTemplate MutableString; 00132 00133 private alias FormatStructTemplate!(T) Format; 00134 private alias Unicode.Into!(T) Into; 00135 00136 private Into into; // unicode converter 00137 private T[] scratch; // formatting scratchpad 00138 private T[] converts; // unicode buffer 00139 private Format formatter; // printf formatter 00140 00141 00142 /*********************************************************************** 00143 00144 Create an empty MutableString with the specified available 00145 space 00146 00147 ***********************************************************************/ 00148 00149 this (uint space = 0) 00150 { 00151 content.length = space; 00152 mutable = true; 00153 setup (); 00154 } 00155 00156 /*********************************************************************** 00157 00158 Create a MutableString upon the provided content. If said 00159 content is immutable (read-only) then you might consider 00160 setting the 'mutable' parameter to false. Doing so will 00161 avoid allocating heap-space for the content until it is 00162 modified. 00163 00164 ***********************************************************************/ 00165 00166 this (T[] content, bool mutable = true) 00167 { 00168 setTo (content, mutable); 00169 setup (); 00170 } 00171 00172 /*********************************************************************** 00173 00174 Create a MutableString via the content of a MutableString. 00175 If said content is immutable (read-only) then you might 00176 consider setting the 'mutable' parameter to false. Doing 00177 so will avoid allocating heap-space for the content until 00178 it is modified via MutableString methods. 00179 00180 ***********************************************************************/ 00181 00182 this (MutableString other, bool mutable = true) 00183 { 00184 this (other.get, mutable); 00185 } 00186 00187 /*********************************************************************** 00188 00189 Create a MutableString via the content of a String. Note 00190 that the default is to assume the content is immutable 00191 00192 ***********************************************************************/ 00193 00194 this (String other, bool mutable = false) 00195 { 00196 this (other.get, mutable); 00197 } 00198 00199 /*********************************************************************** 00200 00201 Return an alias to the content of this MutableString 00202 00203 ***********************************************************************/ 00204 00205 T[] aliasOf () 00206 { 00207 return get (); 00208 } 00209 00210 /*********************************************************************** 00211 00212 Remove leading and trailing whitespace from this String. 00213 Note that we slice the content to remove leading space. 00214 00215 ***********************************************************************/ 00216 00217 String trim () 00218 { 00219 content = utils.trim (get()); 00220 len = content.length; 00221 return this; 00222 } 00223 00224 /*********************************************************************** 00225 00226 Append an integer to this MutableString, using standard 00227 printf() notation 00228 00229 ***********************************************************************/ 00230 00231 MutableString append (int v, T[] format=null) 00232 { 00233 formatter (format, &v, v.sizeof, Type.Int); 00234 return this; 00235 } 00236 00237 /*********************************************************************** 00238 00239 Append a long to this MutableString, using standard 00240 printf() notation 00241 00242 ***********************************************************************/ 00243 00244 MutableString append (long v, T[] format=null) 00245 { 00246 formatter (format, &v, v.sizeof, Type.Long); 00247 return this; 00248 } 00249 00250 /*********************************************************************** 00251 00252 Append a double to this MutableString, using standard 00253 printf() notation 00254 00255 ***********************************************************************/ 00256 00257 MutableString append (double v, T[] format=null) 00258 { 00259 formatter (format, &v, v.sizeof, Type.Double); 00260 return this; 00261 } 00262 00263 /*********************************************************************** 00264 00265 Append text to this MutableString 00266 00267 ***********************************************************************/ 00268 00269 MutableString append (char[] chars) 00270 { 00271 convert (chars, Type.Utf8); 00272 return this; 00273 } 00274 00275 /*********************************************************************** 00276 00277 Append text to this MutableString 00278 00279 ***********************************************************************/ 00280 00281 MutableString append (wchar[] chars) 00282 { 00283 convert (chars, Type.Utf16); 00284 return this; 00285 } 00286 00287 /*********************************************************************** 00288 00289 Append text to this MutableString 00290 00291 ***********************************************************************/ 00292 00293 MutableString append (dchar[] chars) 00294 { 00295 convert (chars, Type.Utf32); 00296 return this; 00297 } 00298 00299 /*********************************************************************** 00300 00301 Append a count of characters to this MutableString 00302 00303 ***********************************************************************/ 00304 00305 MutableString append (T chr, int count=1) 00306 { 00307 expand (count); 00308 len += count; 00309 return set (chr, len-count, count); 00310 } 00311 00312 /*********************************************************************** 00313 00314 Append partial text to this MutableString 00315 00316 ***********************************************************************/ 00317 00318 MutableString append (String other, uint start=0, uint len=uint.max) 00319 { 00320 other.pinIndices (start, len); 00321 return append (other.content [start..start+len]); 00322 } 00323 00324 /********************************************************************** 00325 00326 Format a set of arguments using the standard printf() 00327 formatting notation 00328 00329 **********************************************************************/ 00330 00331 MutableString format (T[] fmt, ...) 00332 { 00333 formatter (fmt, _arguments, _argptr); 00334 return this; 00335 } 00336 00337 /*********************************************************************** 00338 00339 Set a section of this MutableString to the specified 00340 character 00341 00342 ***********************************************************************/ 00343 00344 MutableString set (T chr, uint start=0, uint count=1) 00345 { 00346 pinIndices (start, count); 00347 if (! mutable) 00348 realloc (); 00349 content [start..start+count] = chr; 00350 return this; 00351 } 00352 00353 /*********************************************************************** 00354 00355 Set the content to the provided array. Parameter 'mutable' 00356 specifies whether the given array is likely to change. If 00357 not, the array is aliased until such time it is altered. 00358 00359 ***********************************************************************/ 00360 00361 MutableString setTo (T[] chars, bool mutable = true) 00362 { 00363 len = chars.length; 00364 if ((this.mutable = mutable) == true) 00365 content = chars.dup; 00366 else 00367 content = chars; 00368 return this; 00369 } 00370 00371 /*********************************************************************** 00372 00373 Replace the content of this MutableString. If the new content 00374 is immutable (read-only) then you might consider setting the 00375 'mutable' parameter to false. Doing so will avoid allocating 00376 heap-space for the content until it is modified via one of 00377 these methods. 00378 00379 ***********************************************************************/ 00380 00381 MutableString setTo (String other, bool mutable = true) 00382 { 00383 return setTo (other.get, mutable); 00384 } 00385 00386 /*********************************************************************** 00387 00388 Replace the content of this MutableString. If the new content 00389 is immutable (read-only) then you might consider setting the 00390 'mutable' parameter to false. Doing so will avoid allocating 00391 heap-space for the content until it is modified via one of 00392 these methods. 00393 00394 ***********************************************************************/ 00395 00396 MutableString setTo (String other, uint start, uint count, bool mutable = true) 00397 { 00398 other.pinIndices (start, count); 00399 return setTo (other.content [start..start+count], mutable); 00400 } 00401 00402 /*********************************************************************** 00403 00404 Insert characters into this MutableString 00405 00406 ***********************************************************************/ 00407 00408 MutableString insert (T chr, uint index=0, uint count=1) 00409 { 00410 pinIndices (index, count); 00411 expand (index, count); 00412 return set (chr, index, count); 00413 } 00414 00415 /*********************************************************************** 00416 00417 Insert text into this MutableString 00418 00419 ***********************************************************************/ 00420 00421 MutableString insert (T[] other, uint index=0) 00422 { 00423 pinIndex (index); 00424 expand (index, other.length); 00425 content[index..index+other.length] = other; 00426 return this; 00427 } 00428 00429 /*********************************************************************** 00430 00431 Insert another String into this MutableString 00432 00433 ***********************************************************************/ 00434 00435 MutableString insert (String other, uint index=0) 00436 { 00437 return insert (other.get, index); 00438 } 00439 00440 /*********************************************************************** 00441 00442 Remove a piece of this MutableString. 00443 00444 ***********************************************************************/ 00445 00446 MutableString remove (uint start, uint length=uint.max) 00447 { 00448 pinIndices (start, length); 00449 if (length) 00450 if (start >= len) 00451 truncate (start); 00452 else 00453 { 00454 if (! mutable) 00455 realloc (); 00456 00457 uint i = start + length; 00458 memmove (&content[start], &content[i], (len-i) * T.sizeof); 00459 len -= length; 00460 } 00461 return this; 00462 } 00463 00464 /*********************************************************************** 00465 00466 Truncate the length of this MutableString. 00467 00468 ***********************************************************************/ 00469 00470 MutableString truncate (uint length=0) 00471 { 00472 if (length <= len) 00473 len = length; 00474 return this; 00475 } 00476 00477 /********************************************************************** 00478 00479 Arranges text strings in order, using indices to specify 00480 where each particular argument should be positioned within 00481 the text. This is handy for collating I18N components. 00482 00483 @code 00484 auto string = new MutableString; 00485 00486 string.layout ("%2 %1", "one", "two"); 00487 @endcode 00488 00489 The index numbers range from one through nine 00490 00491 **********************************************************************/ 00492 00493 MutableString layout (T[][] layout ...) 00494 { 00495 int args; 00496 bool state; 00497 00498 args = layout.length - 1; 00499 foreach (T c; layout[0]) 00500 { 00501 if (state) 00502 { 00503 state = false; 00504 if (c >= '1' || c <= '9') 00505 { 00506 uint index = c - '0'; 00507 if (index <= args) 00508 { 00509 append (layout[index]); 00510 continue; 00511 } 00512 else 00513 formatter.error ("TextLayout : invalid argument"); 00514 } 00515 } 00516 else 00517 if (c == '%') 00518 { 00519 state = true; 00520 continue; 00521 } 00522 append (c); 00523 } 00524 return this; 00525 } 00526 00527 /*********************************************************************** 00528 00529 Check for available space within the buffer, and expand 00530 as necessary. 00531 00532 ***********************************************************************/ 00533 00534 private final void expand (uint count) 00535 { 00536 if (!mutable || (len + count) > content.length) 00537 realloc (count); 00538 } 00539 00540 /*********************************************************************** 00541 00542 make room available to insert something 00543 00544 ***********************************************************************/ 00545 00546 private final void expand (uint index, uint count) 00547 { 00548 expand (count); 00549 memmove (&content[index+count], &content[index], (len - index) * T.sizeof); 00550 len += count; 00551 } 00552 00553 /*********************************************************************** 00554 00555 Allocate memory due to a change in the content. We handle 00556 the distinction between mutable and immutable here. 00557 00558 ***********************************************************************/ 00559 00560 private final void realloc (uint count = 0) 00561 { 00562 uint size = (content.length + count + 127) & ~127; 00563 00564 if (mutable) 00565 content.length = size; 00566 else 00567 { 00568 mutable = true; 00569 T[] x = content; 00570 content = new T[size]; 00571 if (len) 00572 content[0..len] = x; 00573 } 00574 } 00575 00576 /*********************************************************************** 00577 00578 Internal method to support MutableString appending 00579 00580 ***********************************************************************/ 00581 00582 private final MutableString append (T* chars, uint count) 00583 { 00584 expand (count); 00585 content[len..len+count] = chars[0..count]; 00586 len += count; 00587 return this; 00588 } 00589 00590 /*********************************************************************** 00591 00592 Initialize this MutableString. Allocate conversion buffers 00593 and prime the formatter 00594 00595 ***********************************************************************/ 00596 00597 private void setup (Format.DblFormat df = null) 00598 { 00599 00600 scratch = new T[64]; 00601 converts = new T[256]; 00602 formatter.ctor (&convert, null, scratch, df); 00603 } 00604 00605 /********************************************************************** 00606 00607 Support for the formatter, to convert from one encoding 00608 to another 00609 00610 **********************************************************************/ 00611 00612 private uint convert (void[] v, uint type) 00613 { 00614 // convert as required 00615 auto s = cast(T[]) into.convert (v, type, converts); 00616 00617 // hang onto conversion buffer when it grows 00618 if (s.length > converts.length) 00619 converts = s; 00620 00621 // append to string 00622 append (s.ptr, s.length); 00623 return s.length; 00624 } 00625 00626 00627 /+ 00628 // freelist support 00629 // private MutableString next; 00630 // private static uint count; 00631 // private static MutableString freelist; 00632 00633 /*********************************************************************** 00634 00635 return an instance of MutableString. This should be used 00636 for high volume instantiation, since it can avoid memory 00637 allocation. 00638 00639 ***********************************************************************/ 00640 00641 static final synchronized MutableString create () 00642 { 00643 MutableString s; 00644 00645 if (freelist) 00646 { 00647 --count; 00648 s = freelist; 00649 freelist = s.next; 00650 } 00651 else 00652 s = new MutableString; 00653 return s; 00654 } 00655 00656 /*********************************************************************** 00657 00658 return an instance of MutableString. This should be used 00659 for high volume instantiation, since it can avoid memory 00660 allocation. 00661 00662 ***********************************************************************/ 00663 00664 private static synchronized void deallocate (MutableString s) 00665 { 00666 if (count < MaxCache) 00667 { 00668 s.len = 0; 00669 s.content = null; 00670 s.next = freelist; 00671 freelist = s; 00672 ++count; 00673 } 00674 } 00675 +/ 00676 } 00677 00678 00679 00680 /******************************************************************************* 00681 00682 Immutable string. 00683 00684 Note that there's a DMD 141 bug whereby the implementation of 00685 ConvertingString somehow gets hidden if you move the copy() 00686 method above the utf() methods! Shoving copy() right at the 00687 end seems to resolve it for now :-( 00688 00689 *******************************************************************************/ 00690 00691 class StringTemplate(T) : UtfString 00692 { 00693 protected alias get opIndex; 00694 protected alias StringTemplate String; 00695 00696 // the core of the String and MutableString attributes. The name 'len' 00697 // is used rather than the more obvious 'length' since there is 00698 // a collision with the silly array[length] syntactic sugar ... 00699 protected uint len; 00700 package T[] content; 00701 00702 protected Unicode.From!(T) from; 00703 protected TextTemplate!(T) utils; 00704 00705 // this should probably be in MutableString only, but there seems to 00706 // be a compiler bug where it doesn't get initialised correctly, 00707 // and it's perhaps useful to have here for when a MutableString is 00708 // passed as a String argument. 00709 protected bool mutable; 00710 00711 /*********************************************************************** 00712 00713 Hidden constructor 00714 00715 ***********************************************************************/ 00716 00717 private this () 00718 { 00719 } 00720 00721 /*********************************************************************** 00722 00723 Construct read-only wrapper around the given content 00724 00725 ***********************************************************************/ 00726 00727 this (T[] content) 00728 { 00729 this.content = content; 00730 this.len = content.length; 00731 } 00732 00733 /*********************************************************************** 00734 00735 Return the character at the specified position. 00736 00737 ***********************************************************************/ 00738 00739 final T get (uint index) 00740 { 00741 if (index >= len) 00742 error ("index of out bounds"); 00743 return content [index]; 00744 } 00745 00746 /*********************************************************************** 00747 00748 Hash this String 00749 00750 ***********************************************************************/ 00751 00752 final override uint toHash () 00753 { 00754 return hash (content[0..len]); 00755 } 00756 00757 /*********************************************************************** 00758 00759 Return the length of the valid content 00760 00761 ***********************************************************************/ 00762 00763 final uint length () 00764 { 00765 return len; 00766 } 00767 00768 /*********************************************************************** 00769 00770 Is this String equal to another? 00771 00772 ***********************************************************************/ 00773 00774 final bool equals (String other) 00775 { 00776 if (other is this) 00777 return true; 00778 return equals (other.get()); 00779 } 00780 00781 /*********************************************************************** 00782 00783 Is this String equal to the provided text? 00784 00785 ***********************************************************************/ 00786 00787 final bool equals (T[] other) 00788 { 00789 if (other.length == len) 00790 return utils.equal (other.ptr, content.ptr, len); 00791 return false; 00792 } 00793 00794 /*********************************************************************** 00795 00796 Does this String end with specified string? 00797 00798 ***********************************************************************/ 00799 00800 final bool endsWith (String other) 00801 { 00802 return endsWith (other.get); 00803 } 00804 00805 /*********************************************************************** 00806 00807 Does this String end with specified string? 00808 00809 ***********************************************************************/ 00810 00811 final bool endsWith (T[] chars) 00812 { 00813 if (chars.length <= len) 00814 return utils.equal (content[len-chars.length..len].ptr, chars.ptr, chars.length); 00815 return false; 00816 } 00817 00818 /*********************************************************************** 00819 00820 Does this String start with specified string? 00821 00822 ***********************************************************************/ 00823 00824 final bool startsWith (String other) 00825 { 00826 return startsWith (other.get); 00827 } 00828 00829 /*********************************************************************** 00830 00831 Does this String start with specified string? 00832 00833 ***********************************************************************/ 00834 00835 final bool startsWith (T[] chars) 00836 { 00837 if (chars.length <= len) 00838 return utils.equal (content.ptr, chars.ptr, chars.length); 00839 return false; 00840 } 00841 00842 /*********************************************************************** 00843 00844 Find the first occurrence of a BMP code point in a string. 00845 A surrogate code point is found only if its match in the 00846 text is not part of a surrogate pair. 00847 00848 ***********************************************************************/ 00849 00850 final uint indexOf (T c, uint start=0) 00851 { 00852 return utils.indexOf (content[0..len], c, start); 00853 } 00854 00855 /*********************************************************************** 00856 00857 Find the first occurrence of a substring in a string. 00858 00859 The substring is found at code point boundaries. That means 00860 that if the substring begins with a trail surrogate or ends 00861 with a lead surrogate, then it is found only if these 00862 surrogates stand alone in the text. Otherwise, the substring 00863 edge units would be matched against halves of surrogate pairs. 00864 00865 ***********************************************************************/ 00866 00867 final uint indexOf (String other, uint start=0) 00868 { 00869 return indexOf (other.get, start); 00870 } 00871 00872 /*********************************************************************** 00873 00874 Find the first occurrence of a substring in a string. 00875 00876 The substring is found at code point boundaries. That means 00877 that if the substring begins with a trail surrogate or ends 00878 with a lead surrogate, then it is found only if these 00879 surrogates stand alone in the text. Otherwise, the substring 00880 edge units would be matched against halves of surrogate pairs. 00881 00882 ***********************************************************************/ 00883 00884 final uint indexOf (T[] chars, uint start=0) 00885 { 00886 return utils.indexOf (content[0..len], chars, start); 00887 } 00888 00889 /*********************************************************************** 00890 00891 Find the last occurrence of a BMP code point in a string. 00892 A surrogate code point is found only if its match in the 00893 text is not part of a surrogate pair. 00894 00895 ***********************************************************************/ 00896 00897 final uint lastIndexOf (T c, uint start=uint.max) 00898 { 00899 error ("lastIndexOf() currently unimplemented"); 00900 pinIndex (start); 00901 return -1; 00902 } 00903 00904 /*********************************************************************** 00905 00906 Find the last occurrence of a BMP code point in a string. 00907 A surrogate code point is found only if its match in the 00908 text is not part of a surrogate pair. 00909 00910 ***********************************************************************/ 00911 00912 final uint lastIndexOf (String other, uint start=uint.max) 00913 { 00914 error ("lastIndexOf() currently unimplemented"); 00915 return lastIndexOf (other.get, start); 00916 } 00917 00918 /*********************************************************************** 00919 00920 Find the last occurrence of a substring in a string. 00921 00922 The substring is found at code point boundaries. That means 00923 that if the substring begins with a trail surrogate or ends 00924 with a lead surrogate, then it is found only if these 00925 surrogates stand alone in the text. Otherwise, the substring 00926 edge units would be matched against halves of surrogate pairs. 00927 00928 ***********************************************************************/ 00929 00930 final uint lastIndexOf (T[] chars, uint start=uint.max) 00931 { 00932 error ("lastIndexOf() currently unimplemented"); 00933 pinIndex (start); 00934 return -1; 00935 } 00936 00937 /*********************************************************************** 00938 00939 Return content from this String 00940 00941 A slice of dst is returned, representing a copy of the 00942 content. The slice is clipped to the minimum of either 00943 the length of the provided array, or the length of the 00944 content minus the stipulated start point 00945 00946 ***********************************************************************/ 00947 00948 final T[] copy (T[] dst, uint start=0, uint length=uint.max) 00949 { 00950 pinIndices (start, length); 00951 uint i = length - start; 00952 if (i > dst.length) 00953 i = dst.length; 00954 00955 dst [0..i] = content [start..start+i]; 00956 return dst [0..i]; 00957 } 00958 /+ 00959 /*********************************************************************** 00960 00961 Copy a section of this String into a MutableString 00962 00963 ***********************************************************************/ 00964 00965 MutableStringTemplate!(T) copy (MutableStringTemplate!(T) dst, uint start=0, uint len=uint.max) 00966 { 00967 pinIndices (start, len); 00968 return dst.setTo (content[start..start+len], true); 00969 } 00970 +/ 00971 /*********************************************************************** 00972 00973 Bind this string to a token 00974 00975 ***********************************************************************/ 00976 00977 final void bind (TokenTemplate!(T) token) 00978 { 00979 token.prime (get()); 00980 } 00981 00982 /*********************************************************************** 00983 00984 Convert to the AbstractString types. The optional argument 00985 dst with be resized as required to house the conversion. 00986 To minimize heap allocation, use the following pattern: 00987 00988 String string; 00989 00990 wchar[] buffer; 00991 wchar[] result = string.toUtf16 (buffer); 00992 00993 if (result.length > buffer.length) 00994 buffer = result; 00995 00996 You can also provide a buffer from the stack, but the output 00997 will be moved to the heap if said buffer is not large enough 00998 00999 ***********************************************************************/ 01000 01001 char[] utf8 (char[] dst = null) 01002 { 01003 return cast(char[]) from.convert (get(), Type.Utf8, dst); 01004 } 01005 01006 wchar[] utf16 (wchar[] dst = null) 01007 { 01008 return cast(wchar[]) from.convert (get(), Type.Utf16, dst); 01009 } 01010 01011 dchar[] utf32 (dchar[] dst = null) 01012 { 01013 return cast(dchar[]) from.convert (get(), Type.Utf32, dst); 01014 } 01015 01016 /********************************************************************** 01017 01018 Iterate over the characters in this string. Note that 01019 this is a read-only freachable ~ the worst a user can 01020 do is alter the temporary 'c' 01021 01022 **********************************************************************/ 01023 01024 int opApply (int delegate(inout T) dg) 01025 { 01026 int result = 0; 01027 01028 foreach (T c; get()) 01029 if ((result = dg (c)) != 0) 01030 break; 01031 return result; 01032 } 01033 01034 /*********************************************************************** 01035 01036 Compare this String to another 01037 01038 ***********************************************************************/ 01039 01040 final override int opCmp (Object o) 01041 { 01042 return !opEquals (o); 01043 } 01044 01045 /*********************************************************************** 01046 01047 Is this String equal to another? 01048 01049 ***********************************************************************/ 01050 01051 final override int opEquals (Object o) 01052 { 01053 auto other = cast (String) o; 01054 01055 if (other is null) 01056 return 0; 01057 01058 return equals (other); 01059 } 01060 01061 /********************************************************************** 01062 01063 hash() -- hash a variable-length key into a 32-bit value 01064 01065 k : the key (the unaligned variable-length array of bytes) 01066 len : the length of the key, counting by bytes 01067 level : can be any 4-byte value 01068 01069 Returns a 32-bit value. Every bit of the key affects every bit of 01070 the return value. Every 1-bit and 2-bit delta achieves avalanche. 01071 01072 About 4.3*len + 80 X86 instructions, with excellent pipelining 01073 01074 The best hash table sizes are powers of 2. There is no need to do 01075 mod a prime (mod is sooo slow!). If you need less than 32 bits, 01076 use a bitmask. For example, if you need only 10 bits, do 01077 01078 h = (h & hashmask(10)); 01079 01080 In which case, the hash table should have hashsize(10) elements. 01081 If you are hashing n strings (ub1 **)k, do it like this: 01082 01083 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h); 01084 01085 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use 01086 this code any way you wish, private, educational, or commercial. 01087 It's free. 01088 01089 See http://burlteburtle.net/bob/hash/evahash.html 01090 Use for hash table lookup, or anything where one collision in 2^32 01091 is acceptable. Do NOT use for cryptographic purposes. 01092 01093 **********************************************************************/ 01094 01095 static final uint hash (void[] x, uint c = 0) 01096 { 01097 uint a, 01098 b; 01099 01100 a = b = 0x9e3779b9; 01101 01102 uint len = x.length; 01103 ubyte* k = cast(ubyte *) x.ptr; 01104 01105 // handle most of the key 01106 while (len >= 12) 01107 { 01108 a += *cast(uint *)(k+0); 01109 b += *cast(uint *)(k+4); 01110 c += *cast(uint *)(k+8); 01111 01112 a -= b; a -= c; a ^= (c>>13); 01113 b -= c; b -= a; b ^= (a<<8); 01114 c -= a; c -= b; c ^= (b>>13); 01115 a -= b; a -= c; a ^= (c>>12); 01116 b -= c; b -= a; b ^= (a<<16); 01117 c -= a; c -= b; c ^= (b>>5); 01118 a -= b; a -= c; a ^= (c>>3); 01119 b -= c; b -= a; b ^= (a<<10); 01120 c -= a; c -= b; c ^= (b>>15); 01121 k += 12; len -= 12; 01122 } 01123 01124 // handle the last 11 bytes 01125 c += x.length; 01126 switch (len) 01127 { 01128 case 11: c+=(cast(uint)k[10]<<24); 01129 case 10: c+=(cast(uint)k[9]<<16); 01130 case 9 : c+=(cast(uint)k[8]<<8); 01131 case 8 : b+=(cast(uint)k[7]<<24); 01132 case 7 : b+=(cast(uint)k[6]<<16); 01133 case 6 : b+=(cast(uint)k[5]<<8); 01134 case 5 : b+=k[4]; 01135 case 4 : a+=(cast(uint)k[3]<<24); 01136 case 3 : a+=(cast(uint)k[2]<<16); 01137 case 2 : a+=(cast(uint)k[1]<<8); 01138 case 1 : a+=k[0]; 01139 default: 01140 } 01141 01142 a -= b; a -= c; a ^= (c>>13); 01143 b -= c; b -= a; b ^= (a<<8); 01144 c -= a; c -= b; c ^= (b>>13); 01145 a -= b; a -= c; a ^= (c>>12); 01146 b -= c; b -= a; b ^= (a<<16); 01147 c -= a; c -= b; c ^= (b>>5); 01148 a -= b; a -= c; a ^= (c>>3); 01149 b -= c; b -= a; b ^= (a<<10); 01150 c -= a; c -= b; c ^= (b>>15); 01151 01152 return c; 01153 } 01154 01155 /*********************************************************************** 01156 01157 Throw an exception 01158 01159 ***********************************************************************/ 01160 01161 package final void error (char[] msg) 01162 { 01163 static class TextException : Exception 01164 { 01165 this (char[] msg) 01166 { 01167 super (msg); 01168 } 01169 } 01170 01171 throw new TextException (msg); 01172 } 01173 01174 /*********************************************************************** 01175 01176 Return the valid content from this String 01177 01178 ***********************************************************************/ 01179 01180 package final T[] get () 01181 { 01182 return content [0..len]; 01183 } 01184 01185 /*********************************************************************** 01186 01187 Pin the given index to a valid position. 01188 01189 ***********************************************************************/ 01190 01191 package final void pinIndex (inout uint x) 01192 { 01193 if (x > len) 01194 x = len; 01195 } 01196 01197 /*********************************************************************** 01198 01199 Pin the given index and length to a valid position. 01200 01201 ***********************************************************************/ 01202 01203 package final void pinIndices (inout uint start, inout uint length) 01204 { 01205 if (start > len) 01206 start = len; 01207 01208 if (length > (len - start)) 01209 length = len - start; 01210 } 01211 } 01212 01213 01214 /******************************************************************************* 01215 01216 A string that converts to anything 01217 01218 *******************************************************************************/ 01219 01220 class UtfString 01221 { 01222 abstract char[] utf8 (char[] dst = null); 01223 01224 abstract wchar[] utf16 (wchar[] dst = null); 01225 01226 abstract dchar[] utf32 (dchar[] dst = null); 01227 } 01228 01229 01230 01231 alias StringTemplate!(char) Utf8String; 01232 alias MutableStringTemplate!(char) Utf8MutableString; 01233