00001 /******************************************************************************* 00002 00003 @file UDomainName.d 00004 00005 Copyright (c) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 4. Derivative works are permitted, but they must carry this notice 00027 in full and credit the original source. 00028 00029 00030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00031 00032 00033 @version Initial version, November 2004 00034 @author Kris 00035 00036 Note that this package and documentation is built around the ICU 00037 project (http://oss.software.ibm.com/icu/). Below is the license 00038 statement as specified by that software: 00039 00040 00041 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00042 00043 00044 ICU License - ICU 1.8.1 and later 00045 00046 COPYRIGHT AND PERMISSION NOTICE 00047 00048 Copyright (c) 1995-2003 International Business Machines Corporation and 00049 others. 00050 00051 All rights reserved. 00052 00053 Permission is hereby granted, free of charge, to any person obtaining a 00054 copy of this software and associated documentation files (the 00055 "Software"), to deal in the Software without restriction, including 00056 without limitation the rights to use, copy, modify, merge, publish, 00057 distribute, and/or sell copies of the Software, and to permit persons 00058 to whom the Software is furnished to do so, provided that the above 00059 copyright notice(s) and this permission notice appear in all copies of 00060 the Software and that both the above copyright notice(s) and this 00061 permission notice appear in supporting documentation. 00062 00063 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00064 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00065 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT 00066 OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 00067 HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL 00068 INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING 00069 FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, 00070 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION 00071 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 00072 00073 Except as contained in this notice, the name of a copyright holder 00074 shall not be used in advertising or otherwise to promote the sale, use 00075 or other dealings in this Software without prior written authorization 00076 of the copyright holder. 00077 00078 ---------------------------------------------------------------------- 00079 00080 All trademarks and registered trademarks mentioned herein are the 00081 property of their respective owners. 00082 00083 *******************************************************************************/ 00084 00085 module mango.icu.UDomainName; 00086 00087 private import mango.icu.ICU, 00088 mango.icu.UString; 00089 00090 /******************************************************************************* 00091 00092 UIDNA API implements the IDNA protocol as defined in the 00093 IDNA RFC (http://www.ietf.org/rfc/rfc3490.txt). 00094 00095 The RFC defines 2 operations: toAscii and toUnicode. Domain 00096 labels containing non-ASCII code points are required to be 00097 processed by toAscii operation before passing it to resolver 00098 libraries. Domain names that are obtained from resolver 00099 libraries are required to be processed by toUnicode operation 00100 before displaying the domain name to the user. IDNA requires 00101 that implementations process input strings with Nameprep 00102 (http://www.ietf.org/rfc/rfc3491.txt), which is a profile of 00103 Stringprep (http://www.ietf.org/rfc/rfc3454.txt), and then with 00104 Punycode (http://www.ietf.org/rfc/rfc3492.txt). Implementations 00105 of IDNA MUST fully implement Nameprep and Punycode; neither 00106 Nameprep nor Punycode are optional. 00107 00108 The input and output of toAscii() and ToUnicode() operations are 00109 Unicode and are designed to be chainable, i.e., applying toAscii() 00110 or toUnicode() operations multiple times to an input string will 00111 yield the same result as applying the operation once. 00112 00113 See <A HREF="http://oss.software.ibm.com/icu/apiref/uidna_8h.html"> 00114 this page</A> for full details. 00115 00116 *******************************************************************************/ 00117 00118 class UDomainName : ICU 00119 { 00120 private UText text; 00121 private Handle handle; 00122 00123 enum Options 00124 { 00125 Strict, 00126 Lenient, 00127 Std3 00128 } 00129 00130 00131 /*********************************************************************** 00132 00133 00134 ***********************************************************************/ 00135 00136 this (UText text) 00137 { 00138 this.text = text; 00139 } 00140 00141 /*********************************************************************** 00142 00143 This function implements the ToASCII operation as 00144 defined in the IDNA RFC. 00145 00146 This operation is done on single labels before sending 00147 it to something that expects ASCII names. A label is an 00148 individual part of a domain name. Labels are usually 00149 separated by dots; e.g." "www.example.com" is composed 00150 of 3 labels "www","example", and "com". 00151 00152 ***********************************************************************/ 00153 00154 void toAscii (UString dst, Options o = Options.Strict) 00155 { 00156 uint fmt (wchar* p, uint len, inout Error e) 00157 { 00158 return uidna_toASCII (text.get, text.len, p, len, o, null, e); 00159 } 00160 00161 dst.format (&fmt, "failed to convert IDN to ASCII"); 00162 } 00163 00164 /*********************************************************************** 00165 00166 This function implements the ToUnicode operation as 00167 defined in the IDNA RFC. 00168 00169 This operation is done on single labels before sending 00170 it to something that expects Unicode names. A label is 00171 an individual part of a domain name. Labels are usually 00172 separated by dots; for e.g." "www.example.com" is composed 00173 of 3 labels "www","example", and "com". 00174 00175 ***********************************************************************/ 00176 00177 void toUnicode (UString dst, Options o = Options.Strict) 00178 { 00179 uint fmt (wchar* p, uint len, inout Error e) 00180 { 00181 return uidna_toUnicode (text.get, text.len, p, len, o, null, e); 00182 } 00183 00184 dst.format (&fmt, "failed to convert IDN to Unicode"); 00185 } 00186 00187 /*********************************************************************** 00188 00189 Convenience function that implements the IDNToASCII 00190 operation as defined in the IDNA RFC. 00191 00192 This operation is done on complete domain names, e.g: 00193 "www.example.com". It is important to note that this 00194 operation can fail. If it fails, then the input domain 00195 name cannot be used as an Internationalized Domain Name 00196 and the application should have methods defined to deal 00197 with the failure. 00198 00199 Note: IDNA RFC specifies that a conformant application 00200 should divide a domain name into separate labels, decide 00201 whether to apply allowUnassigned and useSTD3ASCIIRules 00202 on each, and then convert. This function does not offer 00203 that level of granularity. The options once set will apply 00204 to all labels in the domain name 00205 00206 ***********************************************************************/ 00207 00208 void IdnToAscii (UString dst, Options o = Options.Strict) 00209 { 00210 uint fmt (wchar* p, uint len, inout Error e) 00211 { 00212 return uidna_IDNToASCII (text.get, text.len, p, len, o, null, e); 00213 } 00214 00215 dst.format (&fmt, "failed to convert IDN to ASCII"); 00216 } 00217 00218 /*********************************************************************** 00219 00220 Convenience function that implements the IDNToUnicode 00221 operation as defined in the IDNA RFC. 00222 00223 This operation is done on complete domain names, e.g: 00224 "www.example.com". 00225 00226 Note: IDNA RFC specifies that a conformant application 00227 should divide a domain name into separate labels, decide 00228 whether to apply allowUnassigned and useSTD3ASCIIRules 00229 on each, and then convert. This function does not offer 00230 that level of granularity. The options once set will apply 00231 to all labels in the domain name 00232 00233 ***********************************************************************/ 00234 00235 void IdnToUnicode (UString dst, Options o = Options.Strict) 00236 { 00237 uint fmt (wchar* p, uint len, inout Error e) 00238 { 00239 return uidna_IDNToUnicode (text.get, text.len, p, len, o, null, e); 00240 } 00241 00242 dst.format (&fmt, "failed to convert IDN to Unicode"); 00243 } 00244 00245 /*********************************************************************** 00246 00247 Compare two IDN strings for equivalence. 00248 00249 This function splits the domain names into labels and 00250 compares them. According to IDN RFC, whenever two labels 00251 are compared, they are considered equal if and only if 00252 their ASCII forms (obtained by applying toASCII) match 00253 using an case-insensitive ASCII comparison. Two domain 00254 names are considered a match if and only if all labels 00255 match regardless of whether label separators match 00256 00257 ***********************************************************************/ 00258 00259 int compare (UString other, Options o = Options.Strict) 00260 { 00261 Error e; 00262 int i = uidna_compare (text.get, text.len, other.get, other.len, o, e); 00263 testError (e, "failed to compare IDN strings"); 00264 return i; 00265 } 00266 00267 00268 /*********************************************************************** 00269 00270 Bind the ICU functions from a shared library. This is 00271 complicated by the issues regarding D and DLLs on the 00272 Windows platform 00273 00274 ***********************************************************************/ 00275 00276 private static void* library; 00277 00278 /*********************************************************************** 00279 00280 ***********************************************************************/ 00281 00282 private static extern (C) 00283 { 00284 uint function (wchar*, uint, wchar*, uint, uint, void*, inout Error) uidna_toASCII; 00285 uint function (wchar*, uint, wchar*, uint, uint, void*, inout Error) uidna_toUnicode; 00286 uint function (wchar*, uint, wchar*, uint, uint, void*, inout Error) uidna_IDNToASCII; 00287 uint function (wchar*, uint, wchar*, uint, uint, void*, inout Error) uidna_IDNToUnicode; 00288 int function (wchar*, uint, wchar*, uint, uint, inout Error) uidna_compare; 00289 } 00290 00291 /*********************************************************************** 00292 00293 ***********************************************************************/ 00294 00295 static FunctionLoader.Bind[] targets = 00296 [ 00297 {cast(void**) &uidna_toASCII, "uidna_toASCII"}, 00298 {cast(void**) &uidna_toUnicode, "uidna_toUnicode"}, 00299 {cast(void**) &uidna_IDNToASCII, "uidna_IDNToASCII"}, 00300 {cast(void**) &uidna_IDNToUnicode, "uidna_IDNToUnicode"}, 00301 {cast(void**) &uidna_compare, "uidna_compare"}, 00302 ]; 00303 00304 /*********************************************************************** 00305 00306 ***********************************************************************/ 00307 00308 static this () 00309 { 00310 library = FunctionLoader.bind (icuuc, targets); 00311 } 00312 00313 /*********************************************************************** 00314 00315 ***********************************************************************/ 00316 00317 static ~this () 00318 { 00319 FunctionLoader.unbind (library); 00320 } 00321 } 00322