Main Page | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members | Related Pages

text/Text.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file Util.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032         
00033         @version        Initial version, April 2004      
00034         @author         Kris
00035 
00036 
00037 *******************************************************************************/
00038 
00039 module mango.text.Text;
00040 
00041 /******************************************************************************
00042 
00043         Placeholder for a variety of wee functions. Some of these are
00044         handy for Java programmers, but the primary reason for their
00045         existance is that they don't allocate memory ~ processing is 
00046         performed in-place.
00047 
00048 ******************************************************************************/
00049 
00050 struct TextTemplate(T)
00051 {
00052         static if (!is (T == char) && !is (T == wchar) && !is (T == dchar)) 
00053                     pragma (msg, "Template type must be char, wchar, or dchar");
00054 
00055 
00056         /**********************************************************************
00057 
00058                 Replace all instances of one char with another (in place)
00059 
00060         **********************************************************************/
00061 
00062         final static T[] replace (T[] source, T match, T replacement)
00063         {
00064                 T*  p;
00065                 T*  scan = source;
00066                 int length = source.length;
00067 
00068                 while ((p = locate (scan, match, length)) != null)
00069                       {
00070                       *p = replacement;
00071                       length -= (p - scan);
00072                       scan = p;
00073                       }
00074                 return source;
00075         }
00076 
00077         /**********************************************************************
00078 
00079                 Return the index of the next instance of 'match', starting
00080                 at position 'start'
00081                 
00082         **********************************************************************/
00083 
00084         final static int indexOf (T[] source, T match, int start=0)
00085         {
00086                 if (start < source.length)
00087                    {
00088                    T *p = locate (&source[start], match, source.length - start);
00089                    if (p)
00090                        return p - source.ptr;
00091                    }
00092                 return -1;
00093         }
00094 
00095         /**********************************************************************
00096 
00097                 Return the index of the next instance of 'match', starting
00098                 at position 'start'
00099                 
00100         **********************************************************************/
00101 
00102         final static int indexOf (T[] source, T[] match, int start=0)
00103         {
00104                 T*      p;
00105                 int     length = match.length;
00106                 int     extent = source.length - length + 1;
00107                 
00108                 if (length && extent >= 0)
00109                     for (; start < extent; ++start)
00110                            if ((p = locate (source.ptr+start, match[0], extent-start)) != null)
00111                                 if (equal (p, match.ptr, length))
00112                                     return p - source.ptr;
00113                                else
00114                                   start = p - source.ptr;
00115 
00116                 return -1;
00117         }
00118 
00119         /**********************************************************************
00120 
00121                 Return the index of the prior instance of 'match', starting
00122                 at position 'start'
00123                 
00124         **********************************************************************/
00125 
00126         final static int rIndexOf (T[] source, T match, int start=int.max)
00127         {
00128                 if (start is int.max)
00129                     start = source.length;
00130 
00131                 for (int i=start; i-- > 0;)
00132                      if (source[i] is match)
00133                          return i;
00134 
00135                 return -1;
00136         }
00137 
00138         /**********************************************************************
00139 
00140                 Return the index of the prior instance of 'match', starting
00141                 at position 'start'
00142                 
00143         **********************************************************************/
00144 
00145         final static int rIndexOf (T[] source, T[] match, int start=int.max)
00146         {
00147                 int length = match.length;
00148 
00149                 if (start is int.max)
00150                     start = source.length;
00151 
00152                 start -= length;
00153                 while (start >= 0)
00154                       {
00155                       int found = rIndexOf (source, match[0], start);
00156                       if (found < 0)
00157                           break;
00158                       else
00159                          if (equal (match, source.ptr + found, length))
00160                              return found;
00161                          else
00162                             start = found;
00163                       }
00164 
00165                 return -1;
00166         }
00167 
00168         /**********************************************************************
00169 
00170                 Is the argument a whitespace character?
00171 
00172         **********************************************************************/
00173 
00174         final static bool isSpace (T c)
00175         {
00176                 return cast(bool) (c is ' ' || c is '\t' || c is '\r' || c is '\n');
00177         }
00178 
00179         /**********************************************************************
00180 
00181                 Trim the provided string by stripping whitespace from 
00182                 both ends. Returns a slice of the original content.
00183 
00184         **********************************************************************/
00185 
00186         final static T[] trim (T[] source)
00187         {
00188                 int  front,
00189                      back = source.length;
00190 
00191                 if (back)
00192                    {
00193                    while (front < back && isSpace(source[front]))
00194                           ++front;
00195 
00196                    while (back > front && isSpace(source[back-1]))
00197                           --back;
00198                    } 
00199                 return source [front .. back];
00200         }
00201 
00202         /**********************************************************************
00203 
00204                 
00205         **********************************************************************/
00206 
00207         final static T[][] split (T[] src, T[] delim)
00208         {
00209                 int     pos,
00210                         mark;
00211                 T[][]   ret;
00212 
00213                 assert (delim.length);
00214                 while ((pos = indexOf (src, delim, pos)) >= 0)
00215                       { 
00216                       ret ~= src [mark..pos];
00217                       pos += delim.length;
00218                       mark = pos;
00219                       }
00220 
00221                 if (mark < src.length)
00222                     ret ~= src [mark..src.length];
00223                 return ret;                                      
00224         }
00225 
00226         /**********************************************************************
00227 
00228         **********************************************************************/
00229 
00230         version (X86)
00231         {
00232                 static if (is(T == char))
00233                 {
00234                         static char* locate (char* s, char match, int length)
00235                         {
00236                                 asm 
00237                                 {
00238                                 mov   EDI, s;
00239                                 mov   ECX, length; 
00240                                 movzx EAX, match;
00241 
00242                                 cld;
00243                                 repnz;
00244                                 scasb;
00245                                 jz    ok;
00246                                 xor   EAX, EAX;
00247                                 jmp   fail;
00248                         ok:
00249                                 lea   EAX, [EDI-1];
00250                         fail:;
00251                                 }
00252                         }
00253 
00254                         static bool equal (char* s, char* d, int length)
00255                         {
00256                                 asm 
00257                                 {
00258                                 mov   EDI, s;
00259                                 mov   ESI, d;
00260                                 mov   ECX, length; 
00261                                 xor   EAX, EAX;
00262 
00263                                 cld;
00264                                 repz;
00265                                 cmpsb;
00266                                 jnz   fail;
00267                                 inc   EAX;
00268                         fail:;
00269                                 }
00270                         }
00271                 }        
00272 
00273                 static if (is(T == wchar))
00274                 {
00275                         static wchar* locate (wchar* s, wchar match, int length)
00276                         {
00277                                 asm 
00278                                 {
00279                                 mov   EDI, s;
00280                                 mov   ECX, length; 
00281                                 movzx EAX, match;
00282 
00283                                 cld;
00284                                 repnz;
00285                                 scasw;
00286                                 jz    ok;
00287                                 xor   EAX, EAX;
00288                                 jmp   fail;
00289                         ok:
00290                                 lea   EAX, [EDI-2];
00291                         fail:;
00292                                 }
00293                         }
00294 
00295                         static bool equal (wchar* s, wchar* d, int length)
00296                         {
00297                                 asm 
00298                                 {
00299                                 mov   EDI, s;
00300                                 mov   ESI, d;
00301                                 mov   ECX, length; 
00302                                 xor   EAX, EAX;
00303 
00304                                 cld;
00305                                 repz;
00306                                 cmpsw;
00307                                 jnz   fail;
00308                                 inc   EAX;
00309                         fail:;
00310                                 }
00311                         }
00312                 }        
00313 
00314                 static if (is(T == dchar))
00315                 {
00316                         static dchar* locate (dchar* s, dchar match, int length)
00317                         {
00318                                 asm 
00319                                 {
00320                                 mov   EDI, s;
00321                                 mov   ECX, length; 
00322                                 mov   EAX, match;
00323 
00324                                 cld;
00325                                 repnz;
00326                                 scasd;
00327                                 jz    ok;
00328                                 xor   EAX, EAX;
00329                                 jmp   fail;
00330                         ok:
00331                                 lea   EAX, [EDI-4];
00332                         fail:;
00333                                 }
00334                         }
00335 
00336                         static bool equal (dchar* s, dchar* d, int length)
00337                         {
00338                                 asm 
00339                                 {
00340                                 mov   EDI, s;
00341                                 mov   ESI, d;
00342                                 mov   ECX, length; 
00343                                 xor   EAX, EAX;
00344 
00345                                 cld;
00346                                 repz;
00347                                 cmpsd;
00348                                 jnz   fail;
00349                                 inc   EAX;
00350                         fail:;
00351                                 }
00352                         }
00353                 }    
00354         }
00355         else
00356         {
00357                 static T* locate (T* s, T match, int len)
00358                 {
00359                         while (len--)
00360                                if (*s++ == match)
00361                                    return s-1;
00362                         return null;
00363                 }
00364                 
00365                 static bool equal (T* s, T* d, int len)
00366                 {
00367                         while (len--)
00368                                if (*s++ != *d++)
00369                                    return false;
00370                         return true;
00371                 }
00372                 
00373         }    
00374 }
00375 
00376 
00377 /******************************************************************************
00378 
00379         Placeholder for a variety of wee functions. Some of these are
00380         handy for Java programmers, but the primary reason for their
00381         existance is that they don't allocate memory ~ processing is 
00382         performed in-place.
00383 
00384 ******************************************************************************/
00385 
00386 alias TextTemplate!(char) Text;
00387 
00388 
00389 
00390 /******************************************************************************
00391 
00392 ******************************************************************************/
00393 
00394 unittest
00395 {
00396         try 
00397         {
00398         char[] test = "123456789";
00399         assert (Text.locate (test, 'a', test.length) == null);
00400         assert (Text.locate (test, '3', test.length) == &test[2]);
00401         assert (Text.locate (test, '1', test.length) == &test[0]);
00402 
00403         assert (Text.equal (test, test, test.length));
00404         assert (!Text.equal (test, "qwe", 3));
00405         } catch (Object o)
00406                  Cout (o.toString() ~ "\n");
00407 }

Generated on Sat Dec 24 17:28:33 2005 for Mango by  doxygen 1.4.0