Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

Uri.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file Uri.d
00004         
00005         Copyright (C) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026 
00027                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00028 
00029         
00030         @version        Initial version, April 2004      
00031         @author         Kris
00032 
00033 
00034 *******************************************************************************/
00035 
00036 module mango.io.Uri;
00037 
00038 private import  std.ctype;
00039 
00040 private import  mango.utils.Text,
00041                 mango.utils.HeapSlice;
00042 
00043 private import  mango.io.Buffer,
00044                 mango.io.Exception;
00045 
00046 private import  mango.io.model.IWriter;
00047 
00048 /*******************************************************************************
00049 
00050 *******************************************************************************/
00051 
00052 extern (C) char* memchr (char *, char, uint);
00053 
00054 /*******************************************************************************
00055 
00056         Implements an RFC 2396 compliant URI specification. See 
00057         <A HREF="http://ftp.ics.uci.edu/pub/ietf/uri/rfc2396.txt">this page</A>
00058         for more information. 
00059 
00060         The implementation fails the spec on two counts: it doesn't insist
00061         on a scheme being present in the Uri, and it doesn't implement the
00062         "Relative References" support noted in section 5.2
00063 
00064         Use the MutableUri derivative where you need to alter specific uri
00065         attributes.
00066 
00067 *******************************************************************************/
00068 
00069 class Uri : IWritable
00070 {
00071         /***********************************************************************
00072         
00073                 Initialize the Uri character maps and so on
00074 
00075         ***********************************************************************/
00076 
00077         static this ()
00078         {
00079                 error = new IOException ("Invalid URI specification");
00080 
00081                 // Map known generic schemes to their default port. Specify
00082                 // InvalidPort for those schemes that don't use ports. Note
00083                 // that a port value of zero is not supported ...
00084                 foreach (SchemePort sp; schemePorts)
00085                          genericSchemes[sp.name] = sp.port;
00086                 genericSchemes.rehash;
00087 
00088                 // load the character map with valid symbols
00089                 for (int i='a'; i <= 'z'; ++i)  
00090                      map[i] = IncGeneric;
00091 
00092                 for (int i='A'; i <= 'Z'; ++i)  
00093                      map[i] = IncGeneric;
00094 
00095                 for (int i='0'; i<='9'; ++i)  
00096                      map[i] = IncGeneric;
00097 
00098                 // exclude these from parsing elements
00099                 map[':'] |= ExcScheme;
00100                 map['/'] |= ExcScheme | ExcAuthority;
00101                 map['?'] |= ExcScheme | ExcAuthority | ExcPath;
00102                 map['#'] |= ExcScheme | ExcAuthority | ExcPath | ExcQuery;
00103 
00104                 // include these as common symbols
00105                 map['-'] |= IncUser | IncQuery;
00106                 map['_'] |= IncUser | IncQuery;
00107                 map['.'] |= IncUser | IncQuery;
00108                 map['!'] |= IncUser | IncQuery;
00109                 map['~'] |= IncUser | IncQuery;
00110                 map['*'] |= IncUser | IncQuery;
00111                 map['\''] |= IncUser | IncQuery;
00112                 map['('] |= IncUser | IncQuery;
00113                 map[')'] |= IncUser | IncQuery;
00114 
00115                 // include these as scheme symbols
00116                 map['+'] |= IncScheme;
00117                 map['-'] |= IncScheme;
00118                 map['.'] |= IncScheme;
00119 
00120                 // include these as userinfo symbols
00121                 map[';'] |= IncUser;
00122                 map[':'] |= IncUser;
00123                 map['&'] |= IncUser;
00124                 map['='] |= IncUser;
00125                 map['+'] |= IncUser;
00126                 map['$'] |= IncUser;
00127                 map[','] |= IncUser;
00128 
00129                 // include these as path symbols
00130                 map['/'] |= IncPath;
00131                 map[';'] |= IncPath;
00132                 map[':'] |= IncPath;
00133                 map['@'] |= IncPath;
00134                 map['&'] |= IncPath;
00135                 map['='] |= IncPath;
00136                 map['+'] |= IncPath;
00137                 map['$'] |= IncPath;
00138                 map[','] |= IncPath;
00139 
00140                 // include these as query symbols
00141                 map[';'] |= IncQuery;
00142                 map['/'] |= IncQuery;
00143                 map['?'] |= IncQuery;
00144                 map[':'] |= IncQuery;
00145                 map['@'] |= IncQuery;
00146                 map['&'] |= IncQuery;
00147                 map['='] |= IncQuery;
00148                 map['+'] |= IncQuery;
00149                 map['$'] |= IncQuery;
00150                 map[','] |= IncQuery;
00151         }
00152         
00153         /***********************************************************************
00154         
00155                 Construct a Uri from the provided character string
00156 
00157         ***********************************************************************/
00158 
00159         this (char[] uri)
00160         {
00161                 this();
00162                 parse (uri);
00163         }
00164 
00165         /***********************************************************************
00166         
00167                 Return the default port for the given scheme. InvalidPort
00168                 is returned if the scheme is unknown, or does not accept
00169                 a port.
00170 
00171         ***********************************************************************/
00172 
00173         final static int getDefaultPort (char[] scheme)
00174         {
00175                 int port = genericSchemes [scheme]; 
00176                 if (! port)
00177                       port = InvalidPort;
00178                 return port;
00179         }
00180 
00181         /***********************************************************************
00182         
00183                 Return the parsed scheme, or null if the scheme was not
00184                 specified
00185 
00186         ***********************************************************************/
00187 
00188         char[] getScheme()
00189         {
00190                 return scheme;
00191         }
00192 
00193         /***********************************************************************
00194         
00195                 Return the parsed host, or null if the host was not
00196                 specified
00197 
00198         ***********************************************************************/
00199 
00200         char[] getHost()
00201         {
00202                 return host;
00203         }
00204 
00205         /***********************************************************************
00206         
00207                 Return the parsed port number, or InvalidPort if the port
00208                 was not provided.
00209 
00210         ***********************************************************************/
00211 
00212         int getPort()
00213         {
00214                 return port;
00215         }
00216 
00217         /***********************************************************************
00218         
00219                 Return a valid port number by performing a lookup on the 
00220                 known schemes if the port was not explicitly specified.
00221 
00222         ***********************************************************************/
00223 
00224         int getValidPort()
00225         {
00226                 if (port == InvalidPort)
00227                     return getDefaultPort (scheme);
00228                 return port;
00229         }
00230 
00231         /***********************************************************************
00232         
00233                 Return the parsed userinfo, or null if userinfo was not 
00234                 provided.
00235 
00236         ***********************************************************************/
00237 
00238         char[] getUserInfo()
00239         {
00240                 return userinfo;
00241         }
00242 
00243         /***********************************************************************
00244         
00245                 Return the parsed path, or null if the path was not 
00246                 provided.
00247 
00248         ***********************************************************************/
00249 
00250         char[] getPath()
00251         {
00252                 return path;
00253         }
00254 
00255         /***********************************************************************
00256         
00257                 Return the parsed query, or null if a query was not 
00258                 provided.
00259 
00260         ***********************************************************************/
00261 
00262         char[] getQuery()
00263         {
00264                 return query;
00265         }
00266 
00267         /***********************************************************************
00268         
00269                 Return the parsed fragment, or null if a fragment was not 
00270                 provided.
00271 
00272         ***********************************************************************/
00273 
00274         char[] getFragment()
00275         {
00276                 return fragment;
00277         }
00278 
00279         /***********************************************************************
00280         
00281                 return whether or not the Uri scheme is considered generic.
00282 
00283         ***********************************************************************/
00284 
00285         bool isGeneric ()
00286         {
00287                 return genericSchemes [scheme] != 0;
00288         }
00289 
00290         /***********************************************************************
00291         
00292                 Write the content of this Uri to the provided buffer. The
00293                 output is constructed per RFC 2396
00294 
00295         ***********************************************************************/
00296 
00297         IBuffer write (IBuffer buf)
00298         {
00299                 if (scheme.length)
00300                     buf.append (scheme).append(":");
00301 
00302 
00303                 if (userinfo.length || host.length || port != InvalidPort)
00304                    {
00305                    buf.append ("//");
00306 
00307                    if (userinfo.length)
00308                        encode (buf, userinfo, IncUser).append("@");

                   if (host.length)
                       buf.append (host);

                   if (port != InvalidPort && port != getDefaultPort(scheme))
                      {
                      char[5] tmp;
                      buf.append(":").append(Text.itoa (tmp, port));
00309                       }
00310                    }
00311 
00312                 if (path.length)
00313                     encode (buf, path, IncPath);
00314 
00315                 if (query.length)
00316                    {
00317                    buf.append ("?");
00318                    encode (buf, query, IncQuery);
00319                    }
00320 
00321                 if (fragment.length)
00322                    {
00323                    buf.append ("#");
00324                    encode (buf, fragment, IncQuery);
00325                    }
00326 
00327                 return buf;
00328         }
00329 
00330         /***********************************************************************
00331         
00332                 Write the content of this Uri to the provided writer. The
00333                 output is constructed per RFC 2396
00334 
00335         ***********************************************************************/
00336 
00337         void write (IWriter writer)
00338         {
00339                 write (writer.getBuffer);
00340         }
00341 
00342         /***********************************************************************
00343         
00344                 Convert this Uri to a character string
00345 
00346         ***********************************************************************/
00347 
00348         override char[] toString ()
00349         {
00350                 return write (new GrowableBuffer(1024)).toString;
00351         }
00352 
00353         /***********************************************************************
00354         
00355                 Decode a character string with potential %hex values in it.
00356                 The decoded strings are placed into a thread-safe expanding
00357                 buffer, and a slice of it is returned to the requestor.
00358 
00359         ***********************************************************************/
00360 
00361         char[] decode (char[] s)
00362         {
00363                 int length = s.length;
00364 
00365                 // take a peek first, to see if there's work to do
00366                 if (length && memchr (s, '%', length))
00367                    {
00368                    char* p;
00369                    int   j;
00370                         
00371                    // ensure we have enough decoding space available
00372                    p = cast(char*) decoded.expand (length);
00373 
00374                    // scan string, stripping % encodings as we go
00375                    for (int i; i < length; ++i, ++j, ++p)
00376                        {
00377                        char c = s[i];
00378                        if (c == '%' && (i+2) < length)
00379                           {
00380                           c = Text.atoi (s[i+1..i+3], 16);
00381                           i += 2;
00382                           }
00383                        *p = c;
00384                        }
00385                    // return a slice from the decoded input
00386                    return cast(char[]) decoded.slice (j);
00387                    }
00388 
00389                 // return original content
00390                 return s;
00391         }   
00392 
00393         /***********************************************************************
00394         
00395                 Encode uri characters into an output buffer, such that
00396                 reserved chars are converted into their %hex version.
00397 
00398         ***********************************************************************/
00399 
00400         private static IBuffer encode (IBuffer buf, char[] s, int flags)
00401         {
00402                 char[3] hex;
00403                 int     mark;
00404 
00405                 hex[0] = '%';
00406                 foreach (int i, char c; s)
00407                         {
00408                         if (! (map[c] & flags))
00409                            {
00410                            buf.put (&s[mark], i - mark);
00411                            mark = i+1;
00412                                 
00413                            hex[1] = hexDigits [(c >> 4) & 0x0f];
00414                            hex[2] = hexDigits [c & 0x0f];
00415                            buf.append (hex);
00416                            }
00417                         }
00418 
00419                 // add trailing section
00420                 if (mark < s.length)
00421                     buf.put (&s[mark], s.length - mark);
00422 
00423                 return buf;
00424         }
00425 
00426         /***********************************************************************
00427         
00428                 This should not be exposed outside of this module!
00429 
00430         ***********************************************************************/
00431 
00432         private this ()
00433         {
00434                 port = InvalidPort;
00435                 decoded = new HeapSlice (256);
00436         }
00437 
00438         /***********************************************************************
00439         
00440                 Parsing is performed according to RFC 2396
00441                 
00442                 @code
00443                   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
00444                    12            3  4          5       6  7        8 9
00445                     
00446                 2 isolates scheme
00447                 4 isolates authority
00448                 5 isolates path
00449                 7 isolates query
00450                 9 isolates fragment
00451                 @endcode
00452 
00453                 This was originally a state-machine; it turned out to be a 
00454                 lot faster (~40%) when unwound like this instead.
00455                 
00456         ***********************************************************************/
00457 
00458         private void parse (char[] uri)
00459         {
00460                 char    c;
00461                 int     i, 
00462                         mark, 
00463                         len = uri.length;
00464 
00465                 // isolate scheme (note that it's OK to not specify a scheme)
00466                 for (i=0; i < len && !(map[c = uri[i]] & ExcScheme); ++i) {}
00467                 if (c == ':')
00468                    {
00469                    scheme = uri[mark..i];
00470                    Text.tolower (scheme);
00471                    mark = i + 1;
00472                    }
00473 
00474                 // isolate authority
00475                 if (mark < len-1  &&  uri[mark] == '/'  &&  uri[mark+1] == '/')
00476                    {
00477                    for (mark+=2, i=mark; i < len && !(map[uri[i]] & ExcAuthority); ++i) {}
00478                    parseAuthority (uri[mark..i]); 
00479                    mark = i;
00480                    }
00481 
00482                 // isolate path
00483                 for (i=mark; i < len && !(map[uri[i]] & ExcPath); ++i) {}
00484                 path = decode (uri[mark..i]);
00485                 mark = i;
00486 
00487                 // isolate query
00488                 if (mark < len && uri[mark] == '?')
00489                    {
00490                    for (++mark, i=mark; i < len && uri[i] != '#'; ++i) {}
00491                    query = decode (uri[mark..i]);
00492                    mark = i;
00493                    }
00494 
00495                 // isolate fragment
00496                 if (mark < len && uri[mark] == '#')
00497                     fragment = decode (uri[mark+1..len]);
00498         }
00499         
00500         /***********************************************************************
00501         
00502                 Authority is the section after the scheme, but before the 
00503                 path, query or fragment; it typically represents a host.
00504                
00505                 @code
00506                     ^(([^@]*)@?)([^:]*)?(:(.*))?
00507                      12         3       4 5
00508                   
00509                 2 isolates userinfo
00510                 3 isolates host
00511                 5 isolates port
00512                 @endcode
00513 
00514         ***********************************************************************/
00515 
00516         private void parseAuthority (char[] auth)
00517         {
00518                 int     mark,
00519                         len = auth.length;
00520 
00521                 // get userinfo: (([^@]*)@?)
00522                 foreach (int i, char c; auth)
00523                          if (c == '@')
00524                             {
00525                             userinfo = decode (auth[0..i]);
00526                             mark = i + 1;
00527                             break;
00528                             }
00529 
00530                 // get port: (:(.*))?
00531                 for (int i=mark; i < len; ++i)
00532                      if (auth [i] == ':')
00533                         {
00534                         port = Text.atoi (auth [i+1..len]);
00535                         len = i;
00536                         break;
00537                         }
00538 
00539                 // get host: ([^:]*)?
00540                 host = auth [mark..len];
00541         }
00542 
00543 
00544         /***********************************************************************
00545         
00546                 Class attributes
00547 
00548         ***********************************************************************/
00549 
00550         const int               InvalidPort = -1;
00551 
00552         private int             port;
00553         private char[]          host,
00554                                 path,
00555                                 query,
00556                                 scheme,
00557                                 userinfo,
00558                                 fragment;
00559         private HeapSlice       decoded;
00560 
00561         private static ubyte    map[256];
00562 
00563                     
00564         private static short[char[]] genericSchemes;
00565 
00566         private static const char[] hexDigits = "0123456789abcdef";
00567 
00568         private static IOException error;
00569 
00570         private enum    {
00571                         ExcScheme       = 0x01, 
00572                         ExcAuthority    = 0x02, 
00573                         ExcPath         = 0x04, 
00574                         ExcQuery        = 0x08, 
00575                         IncUser         = 0x10, 
00576                         IncPath         = 0x20,
00577                         IncQuery        = 0x40,
00578                         IncScheme       = 0x80,
00579                         IncGeneric      = IncScheme | IncUser | IncPath | IncQuery
00580                         };
00581 
00582         private struct SchemePort
00583         {
00584                         char[]  name;
00585                         int     port;
00586         }
00587 
00588         private static  const SchemePort[] schemePorts =
00589                         [
00590                         {"coffee",      80},
00591                         {"file",        InvalidPort},
00592                         {"ftp",         21},
00593                         {"gopher",      70},
00594                         {"hnews",       80},
00595                         {"http",        80},
00596                         {"http-ng",     80},
00597                         {"https",       443},
00598                         {"imap",        143},
00599                         {"irc",         194}, 
00600                         {"ldap",        389},
00601                         {"news",        119},
00602                         {"nfs",         2049}, 
00603                         {"nntp",        119},
00604                         {"pop",         110}, 
00605                         {"prospero",    1525},
00606                         {"rwhois",      4321},
00607                         {"sip",         InvalidPort},
00608                         {"sips",        InvalidPort},
00609                         {"sipt",        InvalidPort},
00610                         {"sipu",        InvalidPort},
00611                         {"shttp",       80},
00612                         {"smtp",        25},
00613                         {"snews",       563},
00614                         {"telnet",      23},
00615                         {"vemmi",       575},
00616                         {"videotex",    516},
00617                         {"wais",        210},
00618                         {"whois",       43},
00619                         {"whois++",     43},
00620                         ];
00621 }
00622 
00623 
00624 /*******************************************************************************
00625 
00626         Mutable version of Uri
00627 
00628 *******************************************************************************/
00629 
00630 class MutableUri : Uri
00631 {
00632         /***********************************************************************
00633         
00634                 Create an empty Uri
00635 
00636         ***********************************************************************/
00637 
00638         this ()
00639         {
00640                 super();
00641         }
00642 
00643         /***********************************************************************
00644         
00645                 Create a Uri from the provided text string.
00646 
00647         ***********************************************************************/
00648 
00649         this (char[] uri)
00650         {
00651                 super (uri);
00652         }
00653 
00654         /***********************************************************************
00655         
00656                 Construct a Uri from the given components. The query is
00657                 optional.
00658                 
00659         ***********************************************************************/
00660 
00661         this (char[] scheme, char[] host, char[] path, char[] query = null)
00662         {
00663                 super();
00664 
00665                 this.scheme = scheme;
00666                 this.query = query;
00667                 this.host = host;
00668                 this.path = path;
00669         }
00670 
00671         /***********************************************************************
00672         
00673                 Clone another Uri. This can be used to make a MutableUri
00674                 from an immutable Uri.
00675 
00676         ***********************************************************************/
00677 
00678         static MutableUri clone (Uri uri)
00679         {
00680                 with (uri)
00681                      {
00682                      MutableUri ret = new MutableUri (scheme, host, path, query);
00683                      ret.userinfo = userinfo;
00684                      ret.fragment = fragment;
00685                      ret.port = port;
00686                      return ret;
00687                      }
00688         }
00689 
00690         /***********************************************************************
00691         
00692                 Clear everything to null.
00693 
00694         ***********************************************************************/
00695 
00696         void reset()
00697         {
00698                 decoded.reset();
00699                 port = InvalidPort;
00700                 host = path = query = scheme = userinfo = fragment = null;
00701         }
00702 
00703         /***********************************************************************
00704         
00705                 Parse the given uri string
00706 
00707         ***********************************************************************/
00708 
00709         MutableUri parse (char[] uri)
00710         {       
00711                 super.parse (uri);
00712                 return this;
00713         }
00714 
00715         /***********************************************************************
00716                 
00717                 Set the Uri scheme
00718 
00719         ***********************************************************************/
00720 
00721         MutableUri setScheme (char[] scheme)
00722         {
00723                 this.scheme = scheme;
00724                 return this;
00725         }
00726 
00727         /***********************************************************************
00728         
00729                 Set the Uri host
00730 
00731         ***********************************************************************/
00732 
00733         MutableUri setHost (char[] host)
00734         {
00735                 this.host = host;
00736                 return this;
00737         }
00738 
00739         /***********************************************************************
00740         
00741                 Set the Uri port
00742 
00743         ***********************************************************************/
00744 
00745         MutableUri setPort (int port)
00746         {
00747                 this.port = port;
00748                 return this;
00749         }
00750 
00751         /***********************************************************************
00752         
00753                 Set the Uri userinfo
00754 
00755         ***********************************************************************/
00756 
00757         MutableUri setUserInfo(char[] userinfo)
00758         {
00759                 this.userinfo = userinfo;
00760                 return this;
00761         }
00762 
00763         /***********************************************************************
00764         
00765                 Set the Uri query
00766 
00767         ***********************************************************************/
00768 
00769         MutableUri setQuery (char[] query)
00770         {
00771                 this.query = query;
00772                 return this;
00773         }
00774 
00775         /***********************************************************************
00776         
00777                 Set the Uri path
00778 
00779         ***********************************************************************/
00780 
00781         MutableUri setPath (char[] path)
00782         {
00783                 this.path = path;
00784                 return this;
00785         }
00786 
00787         /***********************************************************************
00788         
00789                 Set the Uri fragment
00790 
00791         ***********************************************************************/
00792 
00793         MutableUri setFragment (char[] fragment)
00794         {
00795                 this.fragment = fragment;
00796                 return this;
00797         }
00798 }
00799 
00800 
00801 

Generated on Sun Oct 24 22:31:17 2004 for Mango by doxygen 1.3.6