00001 /******************************************************************************* 00002 00003 @file UChar.d 00004 00005 Copyright (C) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 00027 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00028 00029 00030 @version Initial version, October 2004 00031 @author Kris 00032 00033 00034 Note that this package and documentation is built around the ICU 00035 project (http://oss.software.ibm.com/icu/). Below is the license 00036 statement as specified by that software: 00037 00038 00039 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00040 00041 00042 ICU License - ICU 1.8.1 and later 00043 00044 COPYRIGHT AND PERMISSION NOTICE 00045 00046 Copyright (c) 1995-2003 International Business Machines Corporation and 00047 others. 00048 00049 All rights reserved. 00050 00051 Permission is hereby granted, free of charge, to any person obtaining a 00052 copy of this software and associated documentation files (the 00053 "Software"), to deal in the Software without restriction, including 00054 without limitation the rights to use, copy, modify, merge, publish, 00055 distribute, and/or sell copies of the Software, and to permit persons 00056 to whom the Software is furnished to do so, provided that the above 00057 copyright notice(s) and this permission notice appear in all copies of 00058 the Software and that both the above copyright notice(s) and this 00059 permission notice appear in supporting documentation. 00060 00061 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00062 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00063 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT 00064 OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 00065 HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL 00066 INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING 00067 FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, 00068 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION 00069 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 00070 00071 Except as contained in this notice, the name of a copyright holder 00072 shall not be used in advertising or otherwise to promote the sale, use 00073 or other dealings in this Software without prior written authorization 00074 of the copyright holder. 00075 00076 ---------------------------------------------------------------------- 00077 00078 All trademarks and registered trademarks mentioned herein are the 00079 property of their respective owners. 00080 00081 *******************************************************************************/ 00082 00083 module mango.icu.UChar; 00084 00085 private import mango.icu.ICU; 00086 00087 /******************************************************************************* 00088 00089 This API provides low-level access to the Unicode Character 00090 Database. In addition to raw property values, some convenience 00091 functions calculate derived properties, for example for Java-style 00092 programming. 00093 00094 Unicode assigns each code point (not just assigned character) 00095 values for many properties. Most of them are simple boolean 00096 flags, or constants from a small enumerated list. For some 00097 properties, values are strings or other relatively more complex 00098 types. 00099 00100 For more information see "About the Unicode Character Database" 00101 (http://www.unicode.org/ucd/) and the ICU User Guide chapter on 00102 Properties (http://oss.software.ibm.com/icu/userguide/properties.html). 00103 00104 Many functions are designed to match java.lang.Character functions. 00105 See the individual function documentation, and see the JDK 1.4.1 00106 java.lang.Character documentation at 00107 http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html 00108 00109 There are also functions that provide easy migration from C/POSIX 00110 functions like isblank(). Their use is generally discouraged because 00111 the C/POSIX standards do not define their semantics beyond the ASCII 00112 range, which means that different implementations exhibit very different 00113 behavior. Instead, Unicode properties should be used directly. 00114 00115 There are also only a few, broad C/POSIX character classes, and they 00116 tend to be used for conflicting purposes. For example, the "isalpha()" 00117 class is sometimes used to determine word boundaries, while a more 00118 sophisticated approach would at least distinguish initial letters from 00119 continuation characters (the latter including combining marks). (In 00120 ICU, BreakIterator is the most sophisticated API for word boundaries.) 00121 Another example: There is no "istitle()" class for titlecase characters. 00122 00123 A summary of the behavior of some C/POSIX character classification 00124 implementations for Unicode is available at 00125 http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html 00126 00127 See <A HREF="http://oss.software.ibm.com/icu/apiref/uchar_8h.html"> 00128 this page</A> for full details. 00129 00130 *******************************************************************************/ 00131 00132 class UChar : ICU 00133 { 00134 /*********************************************************************** 00135 00136 ***********************************************************************/ 00137 00138 public static extern (C) 00139 { 00140 /*********************************************************************** 00141 00142 Check if a code point has the Alphabetic Unicode property. 00143 00144 ***********************************************************************/ 00145 00146 bool function (dchar c) isUAlphabetic; 00147 00148 /*********************************************************************** 00149 00150 Check if a code point has the Lowercase Unicode property. 00151 00152 ***********************************************************************/ 00153 00154 bool function (dchar c) isULowercase; 00155 00156 /*********************************************************************** 00157 00158 Check if a code point has the Uppercase Unicode property. 00159 00160 ***********************************************************************/ 00161 00162 bool function (dchar c) isUUppercase; 00163 00164 /*********************************************************************** 00165 00166 Check if a code point has the White_Space Unicode property. 00167 00168 ***********************************************************************/ 00169 00170 bool function (dchar c) isUWhiteSpace; 00171 00172 /*********************************************************************** 00173 00174 Determines whether the specified code point has the general 00175 category "Ll" (lowercase letter). 00176 00177 ***********************************************************************/ 00178 00179 bool function (dchar c) isLower; 00180 00181 /*********************************************************************** 00182 00183 Determines whether the specified code point has the general 00184 category "Lu" (uppercase letter). 00185 00186 ***********************************************************************/ 00187 00188 bool function (dchar c) isUpper; 00189 00190 /*********************************************************************** 00191 00192 Determines whether the specified code point is a titlecase 00193 letter. 00194 00195 ***********************************************************************/ 00196 00197 bool function (dchar c) isTitle; 00198 00199 /*********************************************************************** 00200 00201 Determines whether the specified code point is a digit 00202 character according to Java. 00203 00204 ***********************************************************************/ 00205 00206 bool function (dchar c) isDigit; 00207 00208 /*********************************************************************** 00209 00210 Determines whether the specified code point is a letter 00211 character. 00212 00213 ***********************************************************************/ 00214 00215 bool function (dchar c) isAlpha; 00216 00217 /*********************************************************************** 00218 00219 Determines whether the specified code point is an 00220 alphanumeric character (letter or digit) according 00221 to Java. 00222 00223 ***********************************************************************/ 00224 00225 bool function (dchar c) isAlphaNumeric; 00226 00227 /*********************************************************************** 00228 00229 Determines whether the specified code point is a 00230 hexadecimal digit. 00231 00232 ***********************************************************************/ 00233 00234 bool function (dchar c) isHexDigit; 00235 00236 /*********************************************************************** 00237 00238 Determines whether the specified code point is a 00239 punctuation character. 00240 00241 ***********************************************************************/ 00242 00243 bool function (dchar c) isPunct; 00244 00245 /*********************************************************************** 00246 00247 Determines whether the specified code point is a "graphic" 00248 character (printable, excluding spaces). 00249 00250 ***********************************************************************/ 00251 00252 bool function (dchar c) isGraph; 00253 00254 /*********************************************************************** 00255 00256 Determines whether the specified code point is a "blank" 00257 or "horizontal space", a character that visibly separates 00258 words on a line. 00259 00260 ***********************************************************************/ 00261 00262 bool function (dchar c) isBlank; 00263 00264 /*********************************************************************** 00265 00266 Determines whether the specified code point is "defined", 00267 which usually means that it is assigned a character. 00268 00269 ***********************************************************************/ 00270 00271 bool function (dchar c) isDefined; 00272 00273 /*********************************************************************** 00274 00275 Determines if the specified character is a space character 00276 or not. 00277 00278 ***********************************************************************/ 00279 00280 bool function (dchar c) isSpace; 00281 00282 /*********************************************************************** 00283 00284 Determine if the specified code point is a space character 00285 according to Java. 00286 00287 ***********************************************************************/ 00288 00289 bool function (dchar c) isJavaSpaceChar; 00290 00291 /*********************************************************************** 00292 00293 Determines if the specified code point is a whitespace 00294 character according to Java/ICU. 00295 00296 ***********************************************************************/ 00297 00298 bool function (dchar c) isWhiteSpace; 00299 00300 /*********************************************************************** 00301 00302 Determines whether the specified code point is a control 00303 character (as defined by this function). 00304 00305 ***********************************************************************/ 00306 00307 bool function (dchar c) isCtrl; 00308 00309 /*********************************************************************** 00310 00311 Determines whether the specified code point is an ISO 00312 control code. 00313 00314 ***********************************************************************/ 00315 00316 bool function (dchar c) isISOControl; 00317 00318 /*********************************************************************** 00319 00320 Determines whether the specified code point is a 00321 printable character. 00322 00323 ***********************************************************************/ 00324 00325 bool function (dchar c) isPrint; 00326 00327 /*********************************************************************** 00328 00329 Determines whether the specified code point is a 00330 base character. 00331 00332 ***********************************************************************/ 00333 00334 bool function (dchar c) isBase; 00335 00336 /*********************************************************************** 00337 00338 Determines if the specified character is permissible 00339 as the first character in an identifier according to 00340 Unicode (The Unicode Standard, Version 3.0, chapter 00341 5.16 Identifiers). 00342 00343 ***********************************************************************/ 00344 00345 bool function (dchar c) isIDStart; 00346 00347 /*********************************************************************** 00348 00349 Determines if the specified character is permissible 00350 in an identifier according to Java. 00351 00352 ***********************************************************************/ 00353 00354 bool function (dchar c) isIDPart; 00355 00356 /*********************************************************************** 00357 00358 Determines if the specified character should be regarded 00359 as an ignorable character in an identifier, according 00360 to Java. 00361 00362 ***********************************************************************/ 00363 00364 bool function (dchar c) isIDIgnorable; 00365 00366 /*********************************************************************** 00367 00368 Determines if the specified character is permissible 00369 as the first character in a Java identifier. 00370 00371 ***********************************************************************/ 00372 00373 bool function (dchar c) isJavaIDStart; 00374 00375 /*********************************************************************** 00376 00377 Determines if the specified character is permissible 00378 in a Java identifier. 00379 00380 ***********************************************************************/ 00381 00382 bool function (dchar c) isJavaIDPart; 00383 00384 /*********************************************************************** 00385 00386 Determines whether the code point has the Bidi_Mirrored 00387 property. 00388 00389 ***********************************************************************/ 00390 00391 bool function (dchar c) isMirrored; 00392 00393 /*********************************************************************** 00394 00395 Returns the decimal digit value of a decimal digit character. 00396 00397 ***********************************************************************/ 00398 00399 ubyte function (dchar c) charDigitValue; 00400 00401 /*********************************************************************** 00402 00403 Maps the specified character to a "mirror-image" character. 00404 00405 ***********************************************************************/ 00406 00407 dchar function (dchar c) charMirror; 00408 00409 /*********************************************************************** 00410 00411 Returns the general category value for the code point. 00412 00413 ***********************************************************************/ 00414 00415 ubyte function (dchar c) charType; 00416 00417 /*********************************************************************** 00418 00419 Returns the combining class of the code point as specified 00420 in UnicodeData.txt. 00421 00422 ***********************************************************************/ 00423 00424 ubyte function (dchar c) getCombiningClass; 00425 00426 /*********************************************************************** 00427 00428 The given character is mapped to its lowercase equivalent 00429 according to UnicodeData.txt; if the character has no 00430 lowercase equivalent, the character itself is returned. 00431 00432 ***********************************************************************/ 00433 00434 dchar function (dchar c) toLower; 00435 00436 /*********************************************************************** 00437 00438 The given character is mapped to its uppercase equivalent 00439 according to UnicodeData.txt; if the character has no 00440 uppercase equivalent, the character itself is returned. 00441 00442 ***********************************************************************/ 00443 00444 dchar function (dchar c) toUpper; 00445 00446 /*********************************************************************** 00447 00448 The given character is mapped to its titlecase equivalent 00449 according to UnicodeData.txt; if none is defined, the 00450 character itself is returned. 00451 00452 ***********************************************************************/ 00453 00454 dchar function (dchar c) toTitle; 00455 00456 /*********************************************************************** 00457 00458 The given character is mapped to its case folding equivalent 00459 according to UnicodeData.txt and CaseFolding.txt; if the 00460 character has no case folding equivalent, the character 00461 itself is returned. 00462 00463 ***********************************************************************/ 00464 00465 dchar function (dchar c, uint options) foldCase; 00466 00467 /*********************************************************************** 00468 00469 Returns the decimal digit value of the code point in the 00470 specified radix. 00471 00472 ***********************************************************************/ 00473 00474 uint function (dchar ch, ubyte radix) digit; 00475 00476 /*********************************************************************** 00477 00478 Determines the character representation for a specific 00479 digit in the specified radix. 00480 00481 ***********************************************************************/ 00482 00483 dchar function (uint digit, ubyte radix) forDigit; 00484 00485 /*********************************************************************** 00486 00487 Get the numeric value for a Unicode code point as defined 00488 in the Unicode Character Database. 00489 00490 ***********************************************************************/ 00491 00492 double function (dchar c) getNumericValue; 00493 } 00494 00495 00496 /*********************************************************************** 00497 00498 Bind the ICU functions from a shared library. This is 00499 complicated by the issues regarding D and DLLs on the 00500 Windows platform 00501 00502 ***********************************************************************/ 00503 00504 version (Win32) 00505 { 00506 private static void* library; 00507 private static char[] libraryName = "icuuc30.dll"; 00508 00509 /*************************************************************** 00510 00511 ***************************************************************/ 00512 00513 static FunctionLoader.Bind[] targets = 00514 [ 00515 {cast(void**) &forDigit, "u_forDigit"}, 00516 {cast(void**) &digit, "u_digit"}, 00517 {cast(void**) &foldCase, "u_foldCase"}, 00518 {cast(void**) &toTitle, "u_totitle"}, 00519 {cast(void**) &toUpper, "u_toupper"}, 00520 {cast(void**) &toLower, "u_tolower"}, 00521 {cast(void**) &charDigitValue, "u_charDigitValue"}, 00522 {cast(void**) &getCombiningClass, "u_getCombiningClass"}, 00523 {cast(void**) &charType, "u_charType"}, 00524 {cast(void**) &charMirror, "u_charMirror"}, 00525 {cast(void**) &isJavaIDPart, "u_isJavaIDPart"}, 00526 {cast(void**) &isJavaIDStart, "u_isJavaIDStart"}, 00527 {cast(void**) &isIDIgnorable, "u_isIDIgnorable"}, 00528 {cast(void**) &isIDPart, "u_isIDPart"}, 00529 {cast(void**) &isIDStart, "u_isIDStart"}, 00530 {cast(void**) &isMirrored, "u_isMirrored"}, 00531 {cast(void**) &isBase, "u_isbase"}, 00532 {cast(void**) &isPrint, "u_isprint"}, 00533 {cast(void**) &isISOControl, "u_isISOControl"}, 00534 {cast(void**) &isCtrl, "u_iscntrl"}, 00535 {cast(void**) &isWhiteSpace, "u_isWhitespace"}, 00536 {cast(void**) &isJavaSpaceChar, "u_isJavaSpaceChar"}, 00537 {cast(void**) &isSpace, "u_isspace"}, 00538 {cast(void**) &isDefined, "u_isdefined"}, 00539 {cast(void**) &isBlank, "u_isblank"}, 00540 {cast(void**) &isGraph, "u_isgraph"}, 00541 {cast(void**) &isPunct, "u_ispunct"}, 00542 {cast(void**) &isHexDigit, "u_isxdigit"}, 00543 {cast(void**) &isAlpha, "u_isalpha"}, 00544 {cast(void**) &isAlphaNumeric, "u_isalnum"}, 00545 {cast(void**) &isDigit, "u_isdigit"}, 00546 {cast(void**) &isTitle, "u_istitle"}, 00547 {cast(void**) &isUpper, "u_isupper"}, 00548 {cast(void**) &isLower, "u_islower"}, 00549 {cast(void**) &isUAlphabetic, "u_isUAlphabetic"}, 00550 {cast(void**) &isUWhiteSpace, "u_isUWhiteSpace"}, 00551 {cast(void**) &isUUppercase, "u_isUUppercase"}, 00552 {cast(void**) &isULowercase, "u_isULowercase"}, 00553 {cast(void**) &getNumericValue, "u_getNumericValue"}, 00554 ]; 00555 00556 /*************************************************************** 00557 00558 ***************************************************************/ 00559 00560 static this () 00561 { 00562 library = FunctionLoader.bind (libraryName, targets); 00563 } 00564 00565 /*************************************************************** 00566 00567 ***************************************************************/ 00568 00569 static ~this () 00570 { 00571 FunctionLoader.unbind (library); 00572 } 00573 } 00574 }