00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 module mango.io.Utf;
00040
00041 private import mango.io.Exception;
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073 class Utf
00074 {
00075
00076
00077
00078
00079 private static final void error (char[] msg)
00080 {
00081 throw new IOException (msg);
00082 }
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097 static final char[] toUtf8 (wchar[] input, char[] output = null)
00098 {
00099 if (! output.length)
00100 output = new char [input.length * 2 + 3];
00101
00102 char* pOut = output;
00103 char* pMax = pOut + output.length - 3;
00104
00105 foreach (wchar b; input)
00106 {
00107 if (pOut >= pMax)
00108 {
00109 int len = pOut - output.ptr;
00110 output.length = len + len / 2;
00111 pOut = output.ptr + len;
00112 pMax = pOut + len - 4;
00113 }
00114
00115 if (b < 0x80)
00116 *pOut++ = b;
00117 else
00118 if (b < 0x0800)
00119 {
00120 pOut[0] = 0xc0 | ((b >> 6) & 0x3f);
00121 pOut[1] = 0x80 | (b & 0x3f);
00122 pOut += 2;
00123 }
00124 else
00125 if (b < 0xd800)
00126 {
00127 pOut[0] = 0xe0 | ((b >> 12) & 0x3f);
00128 pOut[1] = 0x80 | ((b >> 6) & 0x3f);
00129 pOut[2] = 0x80 | (b & 0x3f);
00130 pOut += 3;
00131 }
00132 else
00133 error ("invalid wchar");
00134 }
00135
00136 return output [0..(pOut - output.ptr)];
00137 }
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151 static final wchar[] toUtf16 (char[] input, wchar[] output = null)
00152 {
00153 int produced;
00154 char* pIn = input;
00155 char* pMax = pIn + input.length;
00156
00157 if (! output.length)
00158 output = new wchar[input.length];
00159
00160 foreach (inout wchar d; output)
00161 {
00162 wchar b = cast(wchar) *pIn;
00163 if (b & 0x80)
00164 if (b < 0xe0)
00165 {
00166 b &= 0x1f;
00167 b = (b << 6) | (*++pIn & 0x3f);
00168 }
00169 else
00170 if (b < 0xf0)
00171 {
00172 b &= 0x0f;
00173 b = (b << 6) | (pIn[1] & 0x3f);
00174 b = (b << 6) | (pIn[2] & 0x3f);
00175 pIn += 2;
00176 }
00177
00178 d = b;
00179 ++produced;
00180
00181 if (++pIn >= pMax)
00182 if (pIn > pMax)
00183 error ("invalid utf8");
00184 else
00185 break;
00186 }
00187
00188 if (pIn < pMax)
00189 error ("utf8 overflow");
00190 return output [0..produced];
00191 }
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207 static final char[] toUtf8 (dchar[] input, char[] output = null)
00208 {
00209 if (! output.length)
00210 output = new char [input.length * 2 + 4];
00211
00212 char* pOut = output;
00213 char* pMax = pOut + output.length - 4;
00214
00215 foreach (dchar b; input)
00216 {
00217 if (pOut >= pMax)
00218 {
00219 int len = pOut - output.ptr;
00220 output.length = len + len / 2;
00221 pOut = output.ptr + len;
00222 pMax = pOut + len - 4;
00223 }
00224
00225 if (b < 0x80)
00226 *pOut++ = b;
00227 else
00228 if (b < 0x0800)
00229 {
00230 pOut[0] = 0xc0 | ((b >> 6) & 0x3f);
00231 pOut[1] = 0x80 | (b & 0x3f);
00232 pOut += 2;
00233 }
00234 else
00235 if (b < 0x80000)
00236 {
00237 pOut[0] = 0xe0 | ((b >> 12) & 0x3f);
00238 pOut[1] = 0x80 | ((b >> 6) & 0x3f);
00239 pOut[2] = 0x80 | (b & 0x3f);
00240 pOut += 3;
00241 }
00242 else
00243 if (b < 0x110000)
00244 {
00245 pOut[0] = 0xf0 | ((b >> 18) & 0x3f);
00246 pOut[1] = 0x80 | ((b >> 12) & 0x3f);
00247 pOut[2] = 0x80 | ((b >> 6) & 0x3f);
00248 pOut[3] = 0x80 | (b & 0x3f);
00249 pOut += 4;
00250 }
00251 else
00252 error ("invalid dchar");
00253 }
00254
00255 return output [0..(pOut - output.ptr)];
00256 }
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270 static final dchar[] toUtf32 (char[] input, dchar[] output = null)
00271 {
00272 int produced;
00273 char* pIn = input;
00274 char* pMax = pIn + input.length;
00275
00276 if (! output.length)
00277 output = new dchar[input.length];
00278
00279 foreach (inout dchar d; output)
00280 {
00281 dchar b = cast(dchar) *pIn;
00282 if (b & 0x80)
00283 if (b < 0xe0)
00284 {
00285 b &= 0x1f;
00286 b = (b << 6) | (*++pIn & 0x3f);
00287 }
00288 else
00289 if (b < 0xf0)
00290 {
00291 b &= 0x0f;
00292 b = (b << 6) | (pIn[1] & 0x3f);
00293 b = (b << 6) | (pIn[2] & 0x3f);
00294 pIn += 2;
00295 }
00296 else
00297 {
00298 b &= 0x07;
00299 b = (b << 6) | (pIn[1] & 0x3f);
00300 b = (b << 6) | (pIn[2] & 0x3f);
00301 b = (b << 6) | (pIn[3] & 0x3f);
00302 pIn += 3;
00303 }
00304
00305 d = b;
00306 ++produced;
00307
00308 if (++pIn >= pMax)
00309 if (pIn > pMax)
00310 error ("invalid utf8");
00311 else
00312 break;
00313 }
00314
00315 if (pIn < pMax)
00316 error ("utf8 overflow");
00317 return output [0..produced];
00318 }
00319 }