00001 /******************************************************************************* 00002 00003 @file AnonymousString.d 00004 00005 Copyright (c) 2004 Kris Bell 00006 00007 This software is provided 'as-is', without any express or implied 00008 warranty. In no event will the authors be held liable for damages 00009 of any kind arising from the use of this software. 00010 00011 Permission is hereby granted to anyone to use this software for any 00012 purpose, including commercial applications, and to alter it and/or 00013 redistribute it freely, subject to the following restrictions: 00014 00015 1. The origin of this software must not be misrepresented; you must 00016 not claim that you wrote the original software. If you use this 00017 software in a product, an acknowledgment within documentation of 00018 said product would be appreciated but is not required. 00019 00020 2. Altered source versions must be plainly marked as such, and must 00021 not be misrepresented as being the original software. 00022 00023 3. This notice may not be removed or altered from any distribution 00024 of the source. 00025 00026 4. Derivative works are permitted, but they must carry this notice 00027 in full and credit the original source. 00028 00029 00030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 00031 00032 00033 class AnonymousString : UniString 00034 { 00035 //reset content 00036 void set (char[] content); 00037 void set (wchar[] content); 00038 void set (dchar[] content); 00039 } 00040 00041 class UniString 00042 { 00043 // convert content 00044 abstract char[] utf8 (char[] dst = null); 00045 abstract wchar[] utf16 (wchar[] dst = null); 00046 abstract dchar[] utf32 (dchar[] dst = null); 00047 } 00048 00049 00050 @version Initial version, December 2005 00051 00052 @author Kris 00053 00054 *******************************************************************************/ 00055 00056 module mango.text.AnonymousString; 00057 00058 private import mango.convert.Type, 00059 mango.convert.Unicode; 00060 00061 private import mango.text.model.UniString; 00062 00063 00064 /******************************************************************************* 00065 00066 Anonymous string can used to pass encoding-agnostic content across 00067 contract boundaries 00068 00069 *******************************************************************************/ 00070 00071 class AnonymousString : UniString 00072 { 00073 private uint type; 00074 private bool local; 00075 private void[] buffer; 00076 private void[] content; 00077 00078 /*********************************************************************** 00079 00080 Construct an AnonymousString ~ set up conversion buffer 00081 00082 ***********************************************************************/ 00083 00084 this () 00085 { 00086 buffer = new byte [256]; 00087 } 00088 00089 /*********************************************************************** 00090 00091 Set utf8 content 00092 00093 ***********************************************************************/ 00094 00095 AnonymousString set (char[] content) 00096 { 00097 return set (content, Type.Utf8); 00098 } 00099 00100 /*********************************************************************** 00101 00102 Set utf16 content 00103 00104 ***********************************************************************/ 00105 00106 AnonymousString set (wchar[] content) 00107 { 00108 return set (content, Type.Utf16); 00109 } 00110 00111 /*********************************************************************** 00112 00113 Set utf32 content 00114 00115 ***********************************************************************/ 00116 00117 AnonymousString set (dchar[] content) 00118 { 00119 return set (content, Type.Utf32); 00120 } 00121 00122 /*********************************************************************** 00123 00124 Set content 00125 00126 ***********************************************************************/ 00127 00128 private AnonymousString set (void[] content, uint type) 00129 { 00130 this.content = content; 00131 this.type = type; 00132 return this; 00133 } 00134 00135 /*********************************************************************** 00136 00137 Convert to the UniString types. The optional argument 00138 dst will be resized as required to house the conversion. 00139 To minimize heap allocation, use the following pattern: 00140 00141 String string; 00142 00143 wchar[] buffer; 00144 wchar[] result = string.toUtf16 (buffer); 00145 00146 if (result.length > buffer.length) 00147 buffer = result; 00148 00149 You can also provide a buffer from the stack, but the output 00150 will be moved to the heap if said buffer is not large enough 00151 00152 ***********************************************************************/ 00153 00154 char[] utf8 (char[] dst = null) 00155 { 00156 return cast(char[]) convert (dst, Type.Utf8, null); 00157 } 00158 00159 wchar[] utf16 (wchar[] dst = null) 00160 { 00161 return cast(wchar[]) convert (dst, Type.Utf16, null); 00162 } 00163 00164 dchar[] utf32 (dchar[] dst = null) 00165 { 00166 return cast(dchar[]) convert (dst, Type.Utf32, null); 00167 } 00168 00169 /*********************************************************************** 00170 00171 Convert to the UniString types. Output buffer argument 00172 dst will be resized as required to house the conversion. 00173 To minimize heap allocation, use the following pattern: 00174 00175 String string; 00176 00177 wchar[] buffer; 00178 wchar[] result = string.toUtf16 (buffer); 00179 00180 if (result.length > buffer.length) 00181 buffer = result; 00182 00183 You can also provide a buffer from the stack, but the output 00184 will be moved to the heap if said buffer is not large enough 00185 00186 ***********************************************************************/ 00187 00188 void[] convert (void[] dst, uint dstType, uint* ate) 00189 { 00190 enum {char2char, char2wchar, char2dchar, 00191 wchar2char, wchar2wchar, wchar2dchar, 00192 dchar2char, dchar2wchar, dchar2dchar}; 00193 00194 const int[][3] router = [ 00195 [char2char, char2wchar, char2dchar], 00196 [wchar2char, wchar2wchar, wchar2dchar], 00197 [dchar2char, dchar2wchar, dchar2dchar], 00198 ]; 00199 00200 00201 uint srcType = type; 00202 srcType -= Type.Utf8; 00203 dstType -= Type.Utf8; 00204 assert (srcType < 3); 00205 assert (dstType < 3); 00206 00207 local = false; 00208 if (dst is null) 00209 { 00210 local = true; 00211 dst = buffer; 00212 } 00213 00214 switch (router[srcType][dstType]) 00215 { 00216 case char2char: 00217 return content; 00218 00219 case char2wchar: 00220 return update (Unicode.toUtf16 (cast(char[]) content, cast(wchar[]) dst, ate)); 00221 00222 case char2dchar: 00223 return update (Unicode.toUtf32 (cast(char[]) content, cast(dchar[]) dst, ate)); 00224 00225 00226 case wchar2char: 00227 return update(Unicode.toUtf8 (cast(wchar[]) content, cast(char[]) dst, ate)); 00228 00229 case wchar2wchar: 00230 return content; 00231 00232 case wchar2dchar: 00233 return update (Unicode.toUtf32 (cast(wchar[]) content, cast(dchar[]) dst, ate)); 00234 00235 00236 case dchar2char: 00237 return update (Unicode.toUtf8 (cast(dchar[]) content, cast(char[]) dst, ate)); 00238 00239 case dchar2wchar: 00240 return update (Unicode.toUtf16 (cast(dchar[]) content, cast(wchar[]) dst, ate)); 00241 00242 case dchar2dchar: 00243 return content; 00244 00245 default: 00246 break; 00247 } 00248 return null; 00249 } 00250 00251 /*********************************************************************** 00252 00253 ***********************************************************************/ 00254 00255 private void[] update (void[] ret) 00256 { 00257 if (local && ret.length > buffer.length) 00258 buffer = ret; 00259 return ret; 00260 } 00261 }