00001 /******************************************************************************* 00002 00003 @file USet.d 00004 00005 Copyright (c) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 4. Derivative works are permitted, but they must carry this notice 00027 in full and credit the original source. 00028 00029 00030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00031 00032 00033 @version Initial version, November 2004 00034 @author Kris 00035 00036 Note that this package and documentation is built around the ICU 00037 project (http://oss.software.ibm.com/icu/). Below is the license 00038 statement as specified by that software: 00039 00040 00041 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00042 00043 00044 ICU License - ICU 1.8.1 and later 00045 00046 COPYRIGHT AND PERMISSION NOTICE 00047 00048 Copyright (c) 1995-2003 International Business Machines Corporation and 00049 others. 00050 00051 All rights reserved. 00052 00053 Permission is hereby granted, free of charge, to any person obtaining a 00054 copy of this software and associated documentation files (the 00055 "Software"), to deal in the Software without restriction, including 00056 without limitation the rights to use, copy, modify, merge, publish, 00057 distribute, and/or sell copies of the Software, and to permit persons 00058 to whom the Software is furnished to do so, provided that the above 00059 copyright notice(s) and this permission notice appear in all copies of 00060 the Software and that both the above copyright notice(s) and this 00061 permission notice appear in supporting documentation. 00062 00063 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00064 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00065 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT 00066 OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 00067 HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL 00068 INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING 00069 FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, 00070 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION 00071 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 00072 00073 Except as contained in this notice, the name of a copyright holder 00074 shall not be used in advertising or otherwise to promote the sale, use 00075 or other dealings in this Software without prior written authorization 00076 of the copyright holder. 00077 00078 ---------------------------------------------------------------------- 00079 00080 All trademarks and registered trademarks mentioned herein are the 00081 property of their respective owners. 00082 00083 *******************************************************************************/ 00084 00085 module mango.icu.USet; 00086 00087 private import mango.icu.ICU, 00088 mango.icu.UString; 00089 00090 /******************************************************************************* 00091 00092 A mutable set of Unicode characters and multicharacter strings. 00093 00094 Objects of this class represent character classes used in regular 00095 expressions. A character specifies a subset of Unicode code points. 00096 Legal code points are U+0000 to U+10FFFF, inclusive. 00097 00098 UnicodeSet supports two APIs. The first is the operand API that 00099 allows the caller to modify the value of a UnicodeSet object. It 00100 conforms to Java 2's java.util.Set interface, although UnicodeSet 00101 does not actually implement that interface. All methods of Set are 00102 supported, with the modification that they take a character range 00103 or single character instead of an Object, and they take a UnicodeSet 00104 instead of a Collection. The operand API may be thought of in terms 00105 of boolean logic: a boolean OR is implemented by add, a boolean AND 00106 is implemented by retain, a boolean XOR is implemented by complement 00107 taking an argument, and a boolean NOT is implemented by complement 00108 with no argument. In terms of traditional set theory function names, 00109 add is a union, retain is an intersection, remove is an asymmetric 00110 difference, and complement with no argument is a set complement with 00111 respect to the superset range MIN_VALUE-MAX_VALUE 00112 00113 The second API is the applyPattern()/toPattern() API from the 00114 java.text.Format-derived classes. Unlike the methods that add 00115 characters, add categories, and control the logic of the set, 00116 the method applyPattern() sets all attributes of a UnicodeSet 00117 at once, based on a string pattern. 00118 00119 See <A HREF="http://oss.software.ibm.com/icu/apiref/uset_8h.html"> 00120 this page</A> for full details. 00121 00122 *******************************************************************************/ 00123 00124 class USet : ICU 00125 { 00126 package Handle handle; 00127 00128 enum Options 00129 { 00130 None = 0, 00131 IgnoreSpace = 1, 00132 CaseInsensitive = 2, 00133 } 00134 00135 00136 /*********************************************************************** 00137 00138 Creates a USet object that contains the range of characters 00139 start..end, inclusive 00140 00141 ***********************************************************************/ 00142 00143 this (wchar start, wchar end) 00144 { 00145 handle = uset_open (start, end); 00146 } 00147 00148 /*********************************************************************** 00149 00150 Creates a set from the given pattern. See the UnicodeSet 00151 class description for the syntax of the pattern language 00152 00153 ***********************************************************************/ 00154 00155 this (UText pattern, Options o = Options.None) 00156 { 00157 Error e; 00158 00159 handle = uset_openPatternOptions (pattern.get, pattern.len, o, e); 00160 testError (e, "failed to open pattern-based charset"); 00161 } 00162 00163 /*********************************************************************** 00164 00165 Internal constructor invoked via UCollator 00166 00167 ***********************************************************************/ 00168 00169 package this (Handle handle) 00170 { 00171 this.handle = handle; 00172 } 00173 00174 /*********************************************************************** 00175 00176 Disposes of the storage used by a USet object 00177 00178 ***********************************************************************/ 00179 00180 ~this () 00181 { 00182 uset_close (handle); 00183 } 00184 00185 /*********************************************************************** 00186 00187 Modifies the set to represent the set specified by the 00188 given pattern. See the UnicodeSet class description for 00189 the syntax of the pattern language. See also the User 00190 Guide chapter about UnicodeSet. Empties the set passed 00191 before applying the pattern. 00192 00193 ***********************************************************************/ 00194 00195 void applyPattern (UText pattern, Options o = Options.None) 00196 { 00197 Error e; 00198 00199 uset_applyPattern (handle, pattern.get, pattern.len, o, e); 00200 testError (e, "failed to apply pattern"); 00201 } 00202 00203 /*********************************************************************** 00204 00205 Returns a string representation of this set. If the result 00206 of calling this function is passed to a uset_openPattern(), 00207 it will produce another set that is equal to this one. 00208 00209 ***********************************************************************/ 00210 00211 void toPattern (UString dst, bool escape) 00212 { 00213 uint fmt (wchar* p, uint len, inout Error e) 00214 { 00215 return uset_toPattern (handle, p, len, escape, e); 00216 } 00217 00218 dst.format (&fmt, "failed to convert charset to a pattern"); 00219 } 00220 00221 /*********************************************************************** 00222 00223 Adds the given character to the given USet. After this call, 00224 contains (c) will return true. 00225 00226 ***********************************************************************/ 00227 00228 void add (wchar c) 00229 { 00230 uset_add (handle, c); 00231 } 00232 00233 /*********************************************************************** 00234 00235 Adds all of the elements in the specified set to this set 00236 if they're not already present. This operation effectively 00237 modifies this set so that its value is the union of the two 00238 sets. The behavior of this operation is unspecified if the 00239 specified collection is modified while the operation is in 00240 progress. 00241 00242 ***********************************************************************/ 00243 00244 void addSet (USet other) 00245 { 00246 uset_addAll (handle, other.handle); 00247 } 00248 00249 /*********************************************************************** 00250 00251 Adds the given range of characters to the given USet. After 00252 this call, contains(start, end) will return true 00253 00254 ***********************************************************************/ 00255 00256 void addRange (wchar start, wchar end) 00257 { 00258 uset_addRange (handle, start, end); 00259 } 00260 00261 /*********************************************************************** 00262 00263 Adds the given string to the given USet. After this call, 00264 containsString (str, strLen) will return true 00265 00266 ***********************************************************************/ 00267 00268 void addString (UText t) 00269 { 00270 uset_addString (handle, t.get, t.len); 00271 } 00272 00273 /*********************************************************************** 00274 00275 Removes the given character from this USet. After the 00276 call, contains(c) will return false 00277 00278 ***********************************************************************/ 00279 00280 void remove (wchar c) 00281 { 00282 uset_remove (handle, c); 00283 } 00284 00285 /*********************************************************************** 00286 00287 Removes the given range of characters from this USet. 00288 After the call, contains(start, end) will return false 00289 00290 ***********************************************************************/ 00291 00292 void removeRange (wchar start, wchar end) 00293 { 00294 uset_removeRange (handle, start, end); 00295 } 00296 00297 /*********************************************************************** 00298 00299 Removes the given string from this USet. After the call, 00300 containsString (str, strLen) will return false 00301 00302 ***********************************************************************/ 00303 00304 void removeString (UText t) 00305 { 00306 uset_removeString (handle, t.get, t.len); 00307 } 00308 00309 /*********************************************************************** 00310 00311 Inverts this set. This operation modifies this set so 00312 that its value is its complement. This operation does 00313 not affect the multicharacter strings, if any 00314 00315 ***********************************************************************/ 00316 00317 void complement () 00318 { 00319 uset_complement (handle); 00320 } 00321 00322 /*********************************************************************** 00323 00324 Removes all of the elements from this set. This set will 00325 be empty after this call returns. 00326 00327 ***********************************************************************/ 00328 00329 void clear () 00330 { 00331 uset_clear (handle); 00332 } 00333 00334 /*********************************************************************** 00335 00336 Returns true if this USet contains no characters and no 00337 strings 00338 00339 ***********************************************************************/ 00340 00341 bool isEmpty () 00342 { 00343 return cast(bool) (uset_isEmpty (handle) != 0); 00344 } 00345 00346 /*********************************************************************** 00347 00348 Returns true if this USet contains the given character 00349 00350 ***********************************************************************/ 00351 00352 bool contains (wchar c) 00353 { 00354 return cast(bool) (uset_contains (handle, c) != 0); 00355 } 00356 00357 /*********************************************************************** 00358 00359 Returns true if this USet contains all characters c where 00360 start <= c && c <= end 00361 00362 ***********************************************************************/ 00363 00364 bool containsRange (wchar start, wchar end) 00365 { 00366 return cast(bool) (uset_containsRange (handle, start, end) != 0); 00367 } 00368 00369 /*********************************************************************** 00370 00371 Returns true if this USet contains the given string 00372 00373 ***********************************************************************/ 00374 00375 bool containsString (UText t) 00376 { 00377 return cast(bool) (uset_containsString (handle, t.get, t.len) != 0); 00378 } 00379 00380 /*********************************************************************** 00381 00382 ***********************************************************************/ 00383 00384 uint size () 00385 { 00386 return uset_size (handle); 00387 } 00388 00389 00390 /*********************************************************************** 00391 00392 Bind the ICU functions from a shared library. This is 00393 complicated by the issues regarding D and DLLs on the 00394 Windows platform 00395 00396 ***********************************************************************/ 00397 00398 private static void* library; 00399 00400 /*********************************************************************** 00401 00402 ***********************************************************************/ 00403 00404 private static extern (C) 00405 { 00406 Handle function (wchar start, wchar end) uset_open; 00407 void function (Handle) uset_close; 00408 Handle function (wchar* pattern, uint patternLength, uint options, inout Error e) uset_openPatternOptions; 00409 uint function (Handle, wchar* pattern, uint patternLength, uint options, inout Error e) uset_applyPattern; 00410 uint function (Handle, wchar* result, uint resultCapacity, byte escapeUnprintable, inout Error e) uset_toPattern; 00411 void function (Handle, wchar c) uset_add; 00412 void function (Handle, Handle additionalSet) uset_addAll; 00413 void function (Handle, wchar start, wchar end) uset_addRange; 00414 void function (Handle, wchar* str, uint strLen) uset_addString; 00415 void function (Handle, wchar c) uset_remove; 00416 void function (Handle, wchar start, wchar end) uset_removeRange; 00417 void function (Handle, wchar* str, uint strLen) uset_removeString; 00418 void function (Handle) uset_complement; 00419 void function (Handle) uset_clear; 00420 byte function (Handle) uset_isEmpty; 00421 byte function (Handle, wchar c) uset_contains; 00422 byte function (Handle, wchar start, wchar end) uset_containsRange; 00423 byte function (Handle, wchar* str, uint strLen) uset_containsString; 00424 uint function (Handle) uset_size; 00425 } 00426 00427 /*********************************************************************** 00428 00429 ***********************************************************************/ 00430 00431 static FunctionLoader.Bind[] targets = 00432 [ 00433 {cast(void**) &uset_open, "uset_open"}, 00434 {cast(void**) &uset_close, "uset_close"}, 00435 {cast(void**) &uset_openPatternOptions, "uset_openPatternOptions"}, 00436 {cast(void**) &uset_applyPattern, "uset_applyPattern"}, 00437 {cast(void**) &uset_toPattern, "uset_toPattern"}, 00438 {cast(void**) &uset_add, "uset_add"}, 00439 {cast(void**) &uset_addAll, "uset_addAll"}, 00440 {cast(void**) &uset_addRange, "uset_addRange"}, 00441 {cast(void**) &uset_addString, "uset_addString"}, 00442 {cast(void**) &uset_remove, "uset_remove"}, 00443 {cast(void**) &uset_removeRange, "uset_removeRange"}, 00444 {cast(void**) &uset_removeString, "uset_removeString"}, 00445 {cast(void**) &uset_complement, "uset_complement"}, 00446 {cast(void**) &uset_clear, "uset_clear"}, 00447 {cast(void**) &uset_isEmpty, "uset_isEmpty"}, 00448 {cast(void**) &uset_contains, "uset_contains"}, 00449 {cast(void**) &uset_containsRange, "uset_containsRange"}, 00450 {cast(void**) &uset_containsString, "uset_containsString"}, 00451 {cast(void**) &uset_size, "uset_size"}, 00452 ]; 00453 00454 /*********************************************************************** 00455 00456 ***********************************************************************/ 00457 00458 static this () 00459 { 00460 library = FunctionLoader.bind (icuuc, targets); 00461 } 00462 00463 /*********************************************************************** 00464 00465 ***********************************************************************/ 00466 00467 static ~this () 00468 { 00469 FunctionLoader.unbind (library); 00470 } 00471 } 00472