Main Page | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members | Related Pages

Uri.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file Uri.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032         
00033         @version        Initial version, April 2004      
00034         @author         Kris
00035 
00036 
00037 *******************************************************************************/
00038 
00039 module mango.io.Uri;
00040 
00041 private import  mango.io.Buffer,
00042                 mango.io.Exception;
00043 
00044 private import  mango.format.Int;
00045 
00046 private import  mango.utils.Text,
00047                 mango.utils.HeapSlice;
00048 
00049 private import  mango.io.model.IWriter;
00050 
00051 /*******************************************************************************
00052 
00053 *******************************************************************************/
00054 
00055 extern (C) char* memchr (char *, char, uint);
00056 
00057 /*******************************************************************************
00058 
00059         Implements an RFC 2396 compliant URI specification. See 
00060         <A HREF="http://ftp.ics.uci.edu/pub/ietf/uri/rfc2396.txt">this page</A>
00061         for more information. 
00062 
00063         The implementation fails the spec on two counts: it doesn't insist
00064         on a scheme being present in the Uri, and it doesn't implement the
00065         "Relative References" support noted in section 5.2. Note that IRI
00066         support can be added by assuming each of userinfo, path, query, and 
00067         fragment are UTF-8 encoded 
00068         (see <A HREF="http://www.w3.org/2001/Talks/0912-IUC-IRI/paper.html">
00069         this page</A> for further details).
00070 
00071         Use the MutableUri derivative where you need to alter specific uri
00072         attributes. 
00073 
00074 *******************************************************************************/
00075 
00076 class Uri : IWritable
00077 {
00078         /***********************************************************************
00079         
00080                 Initialize the Uri character maps and so on
00081 
00082         ***********************************************************************/
00083 
00084         static this ()
00085         {
00086                 error = new IOException ("Invalid URI specification");
00087 
00088                 // Map known generic schemes to their default port. Specify
00089                 // InvalidPort for those schemes that don't use ports. Note
00090                 // that a port value of zero is not supported ...
00091                 foreach (SchemePort sp; schemePorts)
00092                          genericSchemes[sp.name] = sp.port;
00093                 genericSchemes.rehash;
00094 
00095                 // load the character map with valid symbols
00096                 for (int i='a'; i <= 'z'; ++i)  
00097                      map[i] = IncGeneric;
00098 
00099                 for (int i='A'; i <= 'Z'; ++i)  
00100                      map[i] = IncGeneric;
00101 
00102                 for (int i='0'; i<='9'; ++i)  
00103                      map[i] = IncGeneric;
00104 
00105                 // exclude these from parsing elements
00106                 map[':'] |= ExcScheme;
00107                 map['/'] |= ExcScheme | ExcAuthority;
00108                 map['?'] |= ExcScheme | ExcAuthority | ExcPath;
00109                 map['#'] |= ExcScheme | ExcAuthority | ExcPath | ExcQuery;
00110 
00111                 // include these as common symbols
00112                 map['-'] |= IncUser | IncQuery;
00113                 map['_'] |= IncUser | IncQuery;
00114                 map['.'] |= IncUser | IncQuery;
00115                 map['!'] |= IncUser | IncQuery;
00116                 map['~'] |= IncUser | IncQuery;
00117                 map['*'] |= IncUser | IncQuery;
00118                 map['\''] |= IncUser | IncQuery;
00119                 map['('] |= IncUser | IncQuery;
00120                 map[')'] |= IncUser | IncQuery;
00121 
00122                 // include these as scheme symbols
00123                 map['+'] |= IncScheme;
00124                 map['-'] |= IncScheme;
00125                 map['.'] |= IncScheme;
00126 
00127                 // include these as userinfo symbols
00128                 map[';'] |= IncUser;
00129                 map[':'] |= IncUser;
00130                 map['&'] |= IncUser;
00131                 map['='] |= IncUser;
00132                 map['+'] |= IncUser;
00133                 map['$'] |= IncUser;
00134                 map[','] |= IncUser;
00135 
00136                 // include these as path symbols
00137                 map['/'] |= IncPath;
00138                 map[';'] |= IncPath;
00139                 map[':'] |= IncPath;
00140                 map['@'] |= IncPath;
00141                 map['&'] |= IncPath;
00142                 map['='] |= IncPath;
00143                 map['+'] |= IncPath;
00144                 map['$'] |= IncPath;
00145                 map[','] |= IncPath;
00146 
00147                 // include these as query symbols
00148                 map[';'] |= IncQuery;
00149                 map['/'] |= IncQuery;
00150                 map['?'] |= IncQuery;
00151                 map[':'] |= IncQuery;
00152                 map['@'] |= IncQuery;
00153                 map['&'] |= IncQuery;
00154                 map['='] |= IncQuery;
00155                 map['+'] |= IncQuery;
00156                 map['$'] |= IncQuery;
00157                 map[','] |= IncQuery;
00158         }
00159         
00160         /***********************************************************************
00161         
00162                 Construct a Uri from the provided character string
00163 
00164         ***********************************************************************/
00165 
00166         this (char[] uri)
00167         {
00168                 this();
00169                 parse (uri);
00170         }
00171 
00172         /***********************************************************************
00173         
00174                 Return the default port for the given scheme. InvalidPort
00175                 is returned if the scheme is unknown, or does not accept
00176                 a port.
00177 
00178         ***********************************************************************/
00179 
00180         final static int getDefaultPort (char[] scheme)
00181         {
00182                 int port = genericSchemes [scheme]; 
00183                 if (! port)
00184                       port = InvalidPort;
00185                 return port;
00186         }
00187 
00188         /***********************************************************************
00189         
00190                 Return the parsed scheme, or null if the scheme was not
00191                 specified
00192 
00193         ***********************************************************************/
00194 
00195         char[] getScheme()
00196         {
00197                 return scheme;
00198         }
00199 
00200         /***********************************************************************
00201         
00202                 Return the parsed host, or null if the host was not
00203                 specified
00204 
00205         ***********************************************************************/
00206 
00207         char[] getHost()
00208         {
00209                 return host;
00210         }
00211 
00212         /***********************************************************************
00213         
00214                 Return the parsed port number, or InvalidPort if the port
00215                 was not provided.
00216 
00217         ***********************************************************************/
00218 
00219         int getPort()
00220         {
00221                 return port;
00222         }
00223 
00224         /***********************************************************************
00225         
00226                 Return a valid port number by performing a lookup on the 
00227                 known schemes if the port was not explicitly specified.
00228 
00229         ***********************************************************************/
00230 
00231         int getValidPort()
00232         {
00233                 if (port == InvalidPort)
00234                     return getDefaultPort (scheme);
00235                 return port;
00236         }
00237 
00238         /***********************************************************************
00239         
00240                 Return the parsed userinfo, or null if userinfo was not 
00241                 provided.
00242 
00243         ***********************************************************************/
00244 
00245         char[] getUserInfo()
00246         {
00247                 return userinfo;
00248         }
00249 
00250         /***********************************************************************
00251         
00252                 Return the parsed path, or null if the path was not 
00253                 provided.
00254 
00255         ***********************************************************************/
00256 
00257         char[] getPath()
00258         {
00259                 return path;
00260         }
00261 
00262         /***********************************************************************
00263         
00264                 Return the parsed query, or null if a query was not 
00265                 provided.
00266 
00267         ***********************************************************************/
00268 
00269         char[] getQuery()
00270         {
00271                 return query;
00272         }
00273 
00274         /***********************************************************************
00275         
00276                 Return the parsed fragment, or null if a fragment was not 
00277                 provided.
00278 
00279         ***********************************************************************/
00280 
00281         char[] getFragment()
00282         {
00283                 return fragment;
00284         }
00285 
00286         /***********************************************************************
00287         
00288                 return whether or not the Uri scheme is considered generic.
00289 
00290         ***********************************************************************/
00291 
00292         bool isGeneric ()
00293         {
00294                 return genericSchemes [scheme] != 0;
00295         }
00296 
00297         /***********************************************************************
00298         
00299                 Write the content of this Uri to the provided buffer. The
00300                 output is constructed per RFC 2396
00301 
00302         ***********************************************************************/
00303 
00304         IBuffer write (IBuffer buf)
00305         {
00306                 if (scheme.length)
00307                     buf.append (scheme).append(":");
00308 
00309 
00310                 if (userinfo.length || host.length || port != InvalidPort)
00311                    {
00312                    buf.append ("//");
00313 
00314                    if (userinfo.length)
00315                        encode (buf, userinfo, IncUser).append("@");
00316 
00317                    if (host.length)
00318                        buf.append (host);
00319 
00320                    if (port != InvalidPort && port != getDefaultPort(scheme))
00321                       {
00322                       char[4] tmp;
00323                       buf.append(":").append(Int.format (tmp, port));
00324                       }
00325                    }
00326 
00327                 if (path.length)
00328                     encode (buf, path, IncPath);
00329 
00330                 if (query.length)
00331                    {
00332                    buf.append ("?");
00333                    encode (buf, query, IncQuery);
00334                    }
00335 
00336                 if (fragment.length)
00337                    {
00338                    buf.append ("#");
00339                    encode (buf, fragment, IncQuery);
00340                    }
00341 
00342                 return buf;
00343         }
00344 
00345         /***********************************************************************
00346         
00347                 Write the content of this Uri to the provided writer. The
00348                 output is constructed per RFC 2396
00349 
00350         ***********************************************************************/
00351 
00352         void write (IWriter writer)
00353         {
00354                 write (writer.getBuffer);
00355         }
00356 
00357         /***********************************************************************
00358         
00359                 Convert this Uri to a character string
00360 
00361         ***********************************************************************/
00362 
00363         override char[] toString ()
00364         {
00365                 return write (new GrowableBuffer(1024)).toString;
00366         }
00367 
00368         /***********************************************************************
00369         
00370                 Decode a character string with potential %hex values in it.
00371                 The decoded strings are placed into a thread-safe expanding
00372                 buffer, and a slice of it is returned to the requestor.
00373 
00374         ***********************************************************************/
00375 
00376         char[] decode (char[] s)
00377         {
00378                 int length = s.length;
00379 
00380                 // take a peek first, to see if there's work to do
00381                 if (length && memchr (s, '%', length))
00382                    {
00383                    char* p;
00384                    int   j;
00385                         
00386                    // ensure we have enough decoding space available
00387                    p = cast(char*) decoded.expand (length);
00388 
00389                    // scan string, stripping % encodings as we go
00390                    for (int i; i < length; ++i, ++j, ++p)
00391                        {
00392                        int c = s[i];
00393                        if (c == '%' && (i+2) < length)
00394                           {
00395                           c = Int.parse (s[i+1..i+3], 16);
00396                           i += 2;
00397                           }
00398 
00399                        *p = c;
00400                        }
00401                    // return a slice from the decoded input
00402                    return cast(char[]) decoded.slice (j);
00403                    }
00404 
00405                 // return original content
00406                 return s;
00407         }   
00408 
00409         /***********************************************************************
00410         
00411                 Encode uri characters into an output buffer, such that
00412                 reserved chars are converted into their %hex version.
00413 
00414         ***********************************************************************/
00415 
00416         private static IBuffer encode (IBuffer buf, char[] s, int flags)
00417         {
00418                 char[3] hex;
00419                 int     mark;
00420 
00421                 hex[0] = '%';
00422                 foreach (int i, char c; s)
00423                         {
00424                         if (! (map[c] & flags))
00425                            {
00426                            buf.append (s[mark..i]);
00427                            mark = i+1;
00428                                 
00429                            hex[1] = hexDigits [(c >> 4) & 0x0f];
00430                            hex[2] = hexDigits [c & 0x0f];
00431                            buf.append (hex);
00432                            }
00433                         }
00434 
00435                 // add trailing section
00436                 if (mark < s.length)
00437                     buf.append (s[mark..s.length]);
00438 
00439                 return buf;
00440         }
00441 
00442         /***********************************************************************
00443         
00444                 This should not be exposed outside of this module!
00445 
00446         ***********************************************************************/
00447 
00448         private this ()
00449         {
00450                 port = InvalidPort;
00451                 decoded = new HeapSlice (256);
00452         }
00453 
00454         /***********************************************************************
00455         
00456                 Parsing is performed according to RFC 2396
00457                 
00458                 @code
00459                   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
00460                    12            3  4          5       6  7        8 9
00461                     
00462                 2 isolates scheme
00463                 4 isolates authority
00464                 5 isolates path
00465                 7 isolates query
00466                 9 isolates fragment
00467                 @endcode
00468 
00469                 This was originally a state-machine; it turned out to be a 
00470                 lot faster (~40%) when unwound like this instead.
00471                 
00472         ***********************************************************************/
00473 
00474         private void parse (char[] uri)
00475         {
00476                 char    c;
00477                 int     i, 
00478                         mark, 
00479                         len = uri.length;
00480 
00481                 // isolate scheme (note that it's OK to not specify a scheme)
00482                 for (i=0; i < len && !(map[c = uri[i]] & ExcScheme); ++i) {}
00483                 if (c == ':')
00484                    {
00485                    scheme = uri[mark..i];
00486                    Text.tolower (scheme);
00487                    mark = i + 1;
00488                    }
00489 
00490                 // isolate authority
00491                 if (mark < len-1  &&  uri[mark] == '/'  &&  uri[mark+1] == '/')
00492                    {
00493                    for (mark+=2, i=mark; i < len && !(map[uri[i]] & ExcAuthority); ++i) {}
00494                    parseAuthority (uri[mark..i]); 
00495                    mark = i;
00496                    }
00497 
00498                 // isolate path
00499                 for (i=mark; i < len && !(map[uri[i]] & ExcPath); ++i) {}
00500                 path = decode (uri[mark..i]);
00501                 mark = i;
00502 
00503                 // isolate query
00504                 if (mark < len && uri[mark] == '?')
00505                    {
00506                    for (++mark, i=mark; i < len && uri[i] != '#'; ++i) {}
00507                    query = decode (uri[mark..i]);
00508                    mark = i;
00509                    }
00510 
00511                 // isolate fragment
00512                 if (mark < len && uri[mark] == '#')
00513                     fragment = decode (uri[mark+1..len]);
00514         }
00515         
00516         /***********************************************************************
00517         
00518                 Authority is the section after the scheme, but before the 
00519                 path, query or fragment; it typically represents a host.
00520                
00521                 @code
00522                     ^(([^@]*)@?)([^:]*)?(:(.*))?
00523                      12         3       4 5
00524                   
00525                 2 isolates userinfo
00526                 3 isolates host
00527                 5 isolates port
00528                 @endcode
00529 
00530         ***********************************************************************/
00531 
00532         private void parseAuthority (char[] auth)
00533         {
00534                 int     mark,
00535                         len = auth.length;
00536 
00537                 // get userinfo: (([^@]*)@?)
00538                 foreach (int i, char c; auth)
00539                          if (c == '@')
00540                             {
00541                             userinfo = decode (auth[0..i]);
00542                             mark = i + 1;
00543                             break;
00544                             }
00545 
00546                 // get port: (:(.*))?
00547                 for (int i=mark; i < len; ++i)
00548                      if (auth [i] == ':')
00549                         {
00550                         port = Int.parse (auth [i+1..len]);
00551                         len = i;
00552                         break;
00553                         }
00554 
00555                 // get host: ([^:]*)?
00556                 host = auth [mark..len];
00557         }
00558 
00559 
00560         /***********************************************************************
00561         
00562                 Class attributes
00563 
00564         ***********************************************************************/
00565 
00566         const int               InvalidPort = -1;
00567 
00568         private int             port;
00569         private char[]          host,
00570                                 path,
00571                                 query,
00572                                 scheme,
00573                                 userinfo,
00574                                 fragment;
00575         private HeapSlice       decoded;
00576 
00577         private static ubyte    map[256];
00578 
00579                     
00580         private static short[char[]] genericSchemes;
00581 
00582         private static const char[] hexDigits = "0123456789abcdef";
00583 
00584         private static IOException error;
00585 
00586         private enum    {
00587                         ExcScheme       = 0x01, 
00588                         ExcAuthority    = 0x02, 
00589                         ExcPath         = 0x04, 
00590                         ExcQuery        = 0x08, 
00591                         IncUser         = 0x10, 
00592                         IncPath         = 0x20,
00593                         IncQuery        = 0x40,
00594                         IncScheme       = 0x80,
00595                         IncGeneric      = IncScheme | IncUser | IncPath | IncQuery
00596                         };
00597 
00598         private struct SchemePort
00599         {
00600                         char[]  name;
00601                         int     port;
00602         }
00603 
00604         private static  const SchemePort[] schemePorts =
00605                         [
00606                         {"coffee",      80},
00607                         {"file",        InvalidPort},
00608                         {"ftp",         21},
00609                         {"gopher",      70},
00610                         {"hnews",       80},
00611                         {"http",        80},
00612                         {"http-ng",     80},
00613                         {"https",       443},
00614                         {"imap",        143},
00615                         {"irc",         194}, 
00616                         {"ldap",        389},
00617                         {"news",        119},
00618                         {"nfs",         2049}, 
00619                         {"nntp",        119},
00620                         {"pop",         110}, 
00621                         {"prospero",    1525},
00622                         {"rwhois",      4321},
00623                         {"sip",         InvalidPort},
00624                         {"sips",        InvalidPort},
00625                         {"sipt",        InvalidPort},
00626                         {"sipu",        InvalidPort},
00627                         {"shttp",       80},
00628                         {"smtp",        25},
00629                         {"snews",       563},
00630                         {"telnet",      23},
00631                         {"vemmi",       575},
00632                         {"videotex",    516},
00633                         {"wais",        210},
00634                         {"whois",       43},
00635                         {"whois++",     43},
00636                         ];
00637 }
00638 
00639 
00640 /*******************************************************************************
00641 
00642         Mutable version of Uri
00643 
00644 *******************************************************************************/
00645 
00646 class MutableUri : Uri
00647 {
00648         /***********************************************************************
00649         
00650                 Create an empty Uri
00651 
00652         ***********************************************************************/
00653 
00654         this ()
00655         {
00656                 super();
00657         }
00658 
00659         /***********************************************************************
00660         
00661                 Create a Uri from the provided text string.
00662 
00663         ***********************************************************************/
00664 
00665         this (char[] uri)
00666         {
00667                 super (uri);
00668         }
00669 
00670         /***********************************************************************
00671         
00672                 Construct a Uri from the given components. The query is
00673                 optional.
00674                 
00675         ***********************************************************************/
00676 
00677         this (char[] scheme, char[] host, char[] path, char[] query = null)
00678         {
00679                 super();
00680 
00681                 this.scheme = scheme;
00682                 this.query = query;
00683                 this.host = host;
00684                 this.path = path;
00685         }
00686 
00687         /***********************************************************************
00688         
00689                 Clone another Uri. This can be used to make a MutableUri
00690                 from an immutable Uri.
00691 
00692         ***********************************************************************/
00693 
00694         static MutableUri clone (Uri uri)
00695         {
00696                 with (uri)
00697                      {
00698                      MutableUri ret = new MutableUri (scheme, host, path, query);
00699                      ret.userinfo = userinfo;
00700                      ret.fragment = fragment;
00701                      ret.port = port;
00702                      return ret;
00703                      }
00704         }
00705 
00706         /***********************************************************************
00707         
00708                 Clear everything to null.
00709 
00710         ***********************************************************************/
00711 
00712         void reset()
00713         {
00714                 decoded.reset();
00715                 port = InvalidPort;
00716                 host = path = query = scheme = userinfo = fragment = null;
00717         }
00718 
00719         /***********************************************************************
00720         
00721                 Parse the given uri string
00722 
00723         ***********************************************************************/
00724 
00725         MutableUri parse (char[] uri)
00726         {       
00727                 super.parse (uri);
00728                 return this;
00729         }
00730 
00731         /***********************************************************************
00732                 
00733                 Set the Uri scheme
00734 
00735         ***********************************************************************/
00736 
00737         MutableUri setScheme (char[] scheme)
00738         {
00739                 this.scheme = scheme;
00740                 return this;
00741         }
00742 
00743         /***********************************************************************
00744         
00745                 Set the Uri host
00746 
00747         ***********************************************************************/
00748 
00749         MutableUri setHost (char[] host)
00750         {
00751                 this.host = host;
00752                 return this;
00753         }
00754 
00755         /***********************************************************************
00756         
00757                 Set the Uri port
00758 
00759         ***********************************************************************/
00760 
00761         MutableUri setPort (int port)
00762         {
00763                 this.port = port;
00764                 return this;
00765         }
00766 
00767         /***********************************************************************
00768         
00769                 Set the Uri userinfo
00770 
00771         ***********************************************************************/
00772 
00773         MutableUri setUserInfo(char[] userinfo)
00774         {
00775                 this.userinfo = userinfo;
00776                 return this;
00777         }
00778 
00779         /***********************************************************************
00780         
00781                 Set the Uri query
00782 
00783         ***********************************************************************/
00784 
00785         MutableUri setQuery (char[] query)
00786         {
00787                 this.query = query;
00788                 return this;
00789         }
00790 
00791         /***********************************************************************
00792         
00793                 Extend the Uri query
00794 
00795         ***********************************************************************/
00796 
00797         char[] extendQuery (char[] tail)
00798         {
00799                 if (tail.length)
00800                     if (query.length)
00801                         query = query ~ "&" ~ tail;
00802                     else
00803                        query = tail;
00804                 return query;
00805         }
00806 
00807         /***********************************************************************
00808         
00809                 Set the Uri path
00810 
00811         ***********************************************************************/
00812 
00813         MutableUri setPath (char[] path)
00814         {
00815                 this.path = path;
00816                 return this;
00817         }
00818 
00819         /***********************************************************************
00820         
00821                 Set the Uri fragment
00822 
00823         ***********************************************************************/
00824 
00825         MutableUri setFragment (char[] fragment)
00826         {
00827                 this.fragment = fragment;
00828                 return this;
00829         }
00830 }
00831 
00832 

Generated on Fri May 27 18:11:57 2005 for Mango by  doxygen 1.4.0