Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

Uri.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file Uri.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032         
00033         @version        Initial version, April 2004      
00034         @author         Kris
00035 
00036 
00037 *******************************************************************************/
00038 
00039 module mango.io.Uri;
00040 
00041 private import  mango.io.Buffer,
00042                 mango.io.Exception;
00043 
00044 private import  mango.format.Int;
00045 
00046 private import  mango.utils.Text,
00047                 mango.utils.HeapSlice;
00048 
00049 private import  mango.io.model.IWriter;
00050 
00051 /*******************************************************************************
00052 
00053 *******************************************************************************/
00054 
00055 extern (C) char* memchr (char *, char, uint);
00056 
00057 /*******************************************************************************
00058 
00059         Implements an RFC 2396 compliant URI specification. See 
00060         <A HREF="http://ftp.ics.uci.edu/pub/ietf/uri/rfc2396.txt">this page</A>
00061         for more information. 
00062 
00063         The implementation fails the spec on two counts: it doesn't insist
00064         on a scheme being present in the Uri, and it doesn't implement the
00065         "Relative References" support noted in section 5.2. Note that IRI
00066         support can be added by assuming each of userinfo, path, query, and 
00067         fragment are UTF-8 encoded 
00068         (see <A HREF="http://www.w3.org/2001/Talks/0912-IUC-IRI/paper.html">
00069         this page</A> for further details).
00070 
00071         Use the MutableUri derivative where you need to alter specific uri
00072         attributes. 
00073 
00074 *******************************************************************************/
00075 
00076 class Uri : IWritable
00077 {
00078         /***********************************************************************
00079         
00080                 Initialize the Uri character maps and so on
00081 
00082         ***********************************************************************/
00083 
00084         static this ()
00085         {
00086                 error = new IOException ("Invalid URI specification");
00087 
00088                 // Map known generic schemes to their default port. Specify
00089                 // InvalidPort for those schemes that don't use ports. Note
00090                 // that a port value of zero is not supported ...
00091                 foreach (SchemePort sp; schemePorts)
00092                          genericSchemes[sp.name] = sp.port;
00093                 genericSchemes.rehash;
00094 
00095                 // load the character map with valid symbols
00096                 for (int i='a'; i <= 'z'; ++i)  
00097                      map[i] = IncGeneric;
00098 
00099                 for (int i='A'; i <= 'Z'; ++i)  
00100                      map[i] = IncGeneric;
00101 
00102                 for (int i='0'; i<='9'; ++i)  
00103                      map[i] = IncGeneric;
00104 
00105                 // exclude these from parsing elements
00106                 map[':'] |= ExcScheme;
00107                 map['/'] |= ExcScheme | ExcAuthority;
00108                 map['?'] |= ExcScheme | ExcAuthority | ExcPath;
00109                 map['#'] |= ExcScheme | ExcAuthority | ExcPath | ExcQuery;
00110 
00111                 // include these as common symbols
00112                 map['-'] |= IncUser | IncQuery;
00113                 map['_'] |= IncUser | IncQuery;
00114                 map['.'] |= IncUser | IncQuery;
00115                 map['!'] |= IncUser | IncQuery;
00116                 map['~'] |= IncUser | IncQuery;
00117                 map['*'] |= IncUser | IncQuery;
00118                 map['\''] |= IncUser | IncQuery;
00119                 map['('] |= IncUser | IncQuery;
00120                 map[')'] |= IncUser | IncQuery;
00121 
00122                 // include these as scheme symbols
00123                 map['+'] |= IncScheme;
00124                 map['-'] |= IncScheme;
00125                 map['.'] |= IncScheme;
00126 
00127                 // include these as userinfo symbols
00128                 map[';'] |= IncUser;
00129                 map[':'] |= IncUser;
00130                 map['&'] |= IncUser;
00131                 map['='] |= IncUser;
00132                 map['+'] |= IncUser;
00133                 map['$'] |= IncUser;
00134                 map[','] |= IncUser;
00135 
00136                 // include these as path symbols
00137                 map['/'] |= IncPath;
00138                 map[';'] |= IncPath;
00139                 map[':'] |= IncPath;
00140                 map['@'] |= IncPath;
00141                 map['&'] |= IncPath;
00142                 map['='] |= IncPath;
00143                 map['+'] |= IncPath;
00144                 map['$'] |= IncPath;
00145                 map[','] |= IncPath;
00146 
00147                 // include these as query symbols
00148                 map[';'] |= IncQuery;
00149                 map['/'] |= IncQuery;
00150                 map['?'] |= IncQuery;
00151                 map[':'] |= IncQuery;
00152                 map['@'] |= IncQuery;
00153                 map['&'] |= IncQuery;
00154                 map['='] |= IncQuery;
00155                 map['+'] |= IncQuery;
00156                 map['$'] |= IncQuery;
00157                 map[','] |= IncQuery;
00158         }
00159         
00160         /***********************************************************************
00161         
00162                 Construct a Uri from the provided character string
00163 
00164         ***********************************************************************/
00165 
00166         this (char[] uri)
00167         {
00168                 this();
00169                 parse (uri);
00170         }
00171 
00172         /***********************************************************************
00173         
00174                 Return the default port for the given scheme. InvalidPort
00175                 is returned if the scheme is unknown, or does not accept
00176                 a port.
00177 
00178         ***********************************************************************/
00179 
00180         final static int getDefaultPort (char[] scheme)
00181         {
00182                 int port = genericSchemes [scheme]; 
00183                 if (! port)
00184                       port = InvalidPort;
00185                 return port;
00186         }
00187 
00188         /***********************************************************************
00189         
00190                 Return the parsed scheme, or null if the scheme was not
00191                 specified
00192 
00193         ***********************************************************************/
00194 
00195         char[] getScheme()
00196         {
00197                 return scheme;
00198         }
00199 
00200         /***********************************************************************
00201         
00202                 Return the parsed host, or null if the host was not
00203                 specified
00204 
00205         ***********************************************************************/
00206 
00207         char[] getHost()
00208         {
00209                 return host;
00210         }
00211 
00212         /***********************************************************************
00213         
00214                 Return the parsed port number, or InvalidPort if the port
00215                 was not provided.
00216 
00217         ***********************************************************************/
00218 
00219         int getPort()
00220         {
00221                 return port;
00222         }
00223 
00224         /***********************************************************************
00225         
00226                 Return a valid port number by performing a lookup on the 
00227                 known schemes if the port was not explicitly specified.
00228 
00229         ***********************************************************************/
00230 
00231         int getValidPort()
00232         {
00233                 if (port == InvalidPort)
00234                     return getDefaultPort (scheme);
00235                 return port;
00236         }
00237 
00238         /***********************************************************************
00239         
00240                 Return the parsed userinfo, or null if userinfo was not 
00241                 provided.
00242 
00243         ***********************************************************************/
00244 
00245         char[] getUserInfo()
00246         {
00247                 return userinfo;
00248         }
00249 
00250         /***********************************************************************
00251         
00252                 Return the parsed path, or null if the path was not 
00253                 provided.
00254 
00255         ***********************************************************************/
00256 
00257         char[] getPath()
00258         {
00259                 return path;
00260         }
00261 
00262         /***********************************************************************
00263         
00264                 Return the parsed query, or null if a query was not 
00265                 provided.
00266 
00267         ***********************************************************************/
00268 
00269         char[] getQuery()
00270         {
00271                 return query;
00272         }
00273 
00274         /***********************************************************************
00275         
00276                 Return the parsed fragment, or null if a fragment was not 
00277                 provided.
00278 
00279         ***********************************************************************/
00280 
00281         char[] getFragment()
00282         {
00283                 return fragment;
00284         }
00285 
00286         /***********************************************************************
00287         
00288                 return whether or not the Uri scheme is considered generic.
00289 
00290         ***********************************************************************/
00291 
00292         bool isGeneric ()
00293         {
00294                 return genericSchemes [scheme] != 0;
00295         }
00296 
00297         /***********************************************************************
00298         
00299                 Write the content of this Uri to the provided buffer. The
00300                 output is constructed per RFC 2396
00301 
00302         ***********************************************************************/
00303 
00304         IBuffer write (IBuffer buf)
00305         {
00306                 if (scheme.length)
00307                     buf.append (scheme).append(":");
00308 
00309 
00310                 if (userinfo.length || host.length || port != InvalidPort)
00311                    {
00312                    buf.append ("//");
00313 
00314                    if (userinfo.length)
00315                        encode (buf, userinfo, IncUser).append("@");

                   if (host.length)
                       buf.append (host);

                   if (port != InvalidPort && port != getDefaultPort(scheme))
                      {
                      char[4] tmp;
                      buf.append(":").append(Int.format (tmp, port));
00316                       }
00317                    }
00318 
00319                 if (path.length)
00320                     encode (buf, path, IncPath);
00321 
00322                 if (query.length)
00323                    {
00324                    buf.append ("?");
00325                    encode (buf, query, IncQuery);
00326                    }
00327 
00328                 if (fragment.length)
00329                    {
00330                    buf.append ("#");
00331                    encode (buf, fragment, IncQuery);
00332                    }
00333 
00334                 return buf;
00335         }
00336 
00337         /***********************************************************************
00338         
00339                 Write the content of this Uri to the provided writer. The
00340                 output is constructed per RFC 2396
00341 
00342         ***********************************************************************/
00343 
00344         void write (IWriter writer)
00345         {
00346                 write (writer.getBuffer);
00347         }
00348 
00349         /***********************************************************************
00350         
00351                 Convert this Uri to a character string
00352 
00353         ***********************************************************************/
00354 
00355         override char[] toString ()
00356         {
00357                 return write (new GrowableBuffer(1024)).toString;
00358         }
00359 
00360         /***********************************************************************
00361         
00362                 Decode a character string with potential %hex values in it.
00363                 The decoded strings are placed into a thread-safe expanding
00364                 buffer, and a slice of it is returned to the requestor.
00365 
00366         ***********************************************************************/
00367 
00368         char[] decode (char[] s)
00369         {
00370                 int length = s.length;
00371 
00372                 // take a peek first, to see if there's work to do
00373                 if (length && memchr (s, '%', length))
00374                    {
00375                    char* p;
00376                    int   j;
00377                         
00378                    // ensure we have enough decoding space available
00379                    p = cast(char*) decoded.expand (length);
00380 
00381                    // scan string, stripping % encodings as we go
00382                    for (int i; i < length; ++i, ++j, ++p)
00383                        {
00384                        int c = s[i];
00385                        if (c == '%' && (i+2) < length)
00386                           {
00387                           c = Int.parse (s[i+1..i+3], Int.Radix.Hexadecimal);
00388                           i += 2;
00389                           }
00390 
00391                        *p = c;
00392                        }
00393                    // return a slice from the decoded input
00394                    return cast(char[]) decoded.slice (j);
00395                    }
00396 
00397                 // return original content
00398                 return s;
00399         }   
00400 
00401         /***********************************************************************
00402         
00403                 Encode uri characters into an output buffer, such that
00404                 reserved chars are converted into their %hex version.
00405 
00406         ***********************************************************************/
00407 
00408         private static IBuffer encode (IBuffer buf, char[] s, int flags)
00409         {
00410                 char[3] hex;
00411                 int     mark;
00412 
00413                 hex[0] = '%';
00414                 foreach (int i, char c; s)
00415                         {
00416                         if (! (map[c] & flags))
00417                            {
00418                            buf.append (s[mark..i]);
00419                            mark = i+1;
00420                                 
00421                            hex[1] = hexDigits [(c >> 4) & 0x0f];
00422                            hex[2] = hexDigits [c & 0x0f];
00423                            buf.append (hex);
00424                            }
00425                         }
00426 
00427                 // add trailing section
00428                 if (mark < s.length)
00429                     buf.append (s[mark..s.length]);
00430 
00431                 return buf;
00432         }
00433 
00434         /***********************************************************************
00435         
00436                 This should not be exposed outside of this module!
00437 
00438         ***********************************************************************/
00439 
00440         private this ()
00441         {
00442                 port = InvalidPort;
00443                 decoded = new HeapSlice (256);
00444         }
00445 
00446         /***********************************************************************
00447         
00448                 Parsing is performed according to RFC 2396
00449                 
00450                 @code
00451                   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
00452                    12            3  4          5       6  7        8 9
00453                     
00454                 2 isolates scheme
00455                 4 isolates authority
00456                 5 isolates path
00457                 7 isolates query
00458                 9 isolates fragment
00459                 @endcode
00460 
00461                 This was originally a state-machine; it turned out to be a 
00462                 lot faster (~40%) when unwound like this instead.
00463                 
00464         ***********************************************************************/
00465 
00466         private void parse (char[] uri)
00467         {
00468                 char    c;
00469                 int     i, 
00470                         mark, 
00471                         len = uri.length;
00472 
00473                 // isolate scheme (note that it's OK to not specify a scheme)
00474                 for (i=0; i < len && !(map[c = uri[i]] & ExcScheme); ++i) {}
00475                 if (c == ':')
00476                    {
00477                    scheme = uri[mark..i];
00478                    Text.tolower (scheme);
00479                    mark = i + 1;
00480                    }
00481 
00482                 // isolate authority
00483                 if (mark < len-1  &&  uri[mark] == '/'  &&  uri[mark+1] == '/')
00484                    {
00485                    for (mark+=2, i=mark; i < len && !(map[uri[i]] & ExcAuthority); ++i) {}
00486                    parseAuthority (uri[mark..i]); 
00487                    mark = i;
00488                    }
00489 
00490                 // isolate path
00491                 for (i=mark; i < len && !(map[uri[i]] & ExcPath); ++i) {}
00492                 path = decode (uri[mark..i]);
00493                 mark = i;
00494 
00495                 // isolate query
00496                 if (mark < len && uri[mark] == '?')
00497                    {
00498                    for (++mark, i=mark; i < len && uri[i] != '#'; ++i) {}
00499                    query = decode (uri[mark..i]);
00500                    mark = i;
00501                    }
00502 
00503                 // isolate fragment
00504                 if (mark < len && uri[mark] == '#')
00505                     fragment = decode (uri[mark+1..len]);
00506         }
00507         
00508         /***********************************************************************
00509         
00510                 Authority is the section after the scheme, but before the 
00511                 path, query or fragment; it typically represents a host.
00512                
00513                 @code
00514                     ^(([^@]*)@?)([^:]*)?(:(.*))?
00515                      12         3       4 5
00516                   
00517                 2 isolates userinfo
00518                 3 isolates host
00519                 5 isolates port
00520                 @endcode
00521 
00522         ***********************************************************************/
00523 
00524         private void parseAuthority (char[] auth)
00525         {
00526                 int     mark,
00527                         len = auth.length;
00528 
00529                 // get userinfo: (([^@]*)@?)
00530                 foreach (int i, char c; auth)
00531                          if (c == '@')
00532                             {
00533                             userinfo = decode (auth[0..i]);
00534                             mark = i + 1;
00535                             break;
00536                             }
00537 
00538                 // get port: (:(.*))?
00539                 for (int i=mark; i < len; ++i)
00540                      if (auth [i] == ':')
00541                         {
00542                         port = Int.parse (auth [i+1..len]);
00543                         len = i;
00544                         break;
00545                         }
00546 
00547                 // get host: ([^:]*)?
00548                 host = auth [mark..len];
00549         }
00550 
00551 
00552         /***********************************************************************
00553         
00554                 Class attributes
00555 
00556         ***********************************************************************/
00557 
00558         const int               InvalidPort = -1;
00559 
00560         private int             port;
00561         private char[]          host,
00562                                 path,
00563                                 query,
00564                                 scheme,
00565                                 userinfo,
00566                                 fragment;
00567         private HeapSlice       decoded;
00568 
00569         private static ubyte    map[256];
00570 
00571                     
00572         private static short[char[]] genericSchemes;
00573 
00574         private static const char[] hexDigits = "0123456789abcdef";
00575 
00576         private static IOException error;
00577 
00578         private enum    {
00579                         ExcScheme       = 0x01, 
00580                         ExcAuthority    = 0x02, 
00581                         ExcPath         = 0x04, 
00582                         ExcQuery        = 0x08, 
00583                         IncUser         = 0x10, 
00584                         IncPath         = 0x20,
00585                         IncQuery        = 0x40,
00586                         IncScheme       = 0x80,
00587                         IncGeneric      = IncScheme | IncUser | IncPath | IncQuery
00588                         };
00589 
00590         private struct SchemePort
00591         {
00592                         char[]  name;
00593                         int     port;
00594         }
00595 
00596         private static  const SchemePort[] schemePorts =
00597                         [
00598                         {"coffee",      80},
00599                         {"file",        InvalidPort},
00600                         {"ftp",         21},
00601                         {"gopher",      70},
00602                         {"hnews",       80},
00603                         {"http",        80},
00604                         {"http-ng",     80},
00605                         {"https",       443},
00606                         {"imap",        143},
00607                         {"irc",         194}, 
00608                         {"ldap",        389},
00609                         {"news",        119},
00610                         {"nfs",         2049}, 
00611                         {"nntp",        119},
00612                         {"pop",         110}, 
00613                         {"prospero",    1525},
00614                         {"rwhois",      4321},
00615                         {"sip",         InvalidPort},
00616                         {"sips",        InvalidPort},
00617                         {"sipt",        InvalidPort},
00618                         {"sipu",        InvalidPort},
00619                         {"shttp",       80},
00620                         {"smtp",        25},
00621                         {"snews",       563},
00622                         {"telnet",      23},
00623                         {"vemmi",       575},
00624                         {"videotex",    516},
00625                         {"wais",        210},
00626                         {"whois",       43},
00627                         {"whois++",     43},
00628                         ];
00629 }
00630 
00631 
00632 /*******************************************************************************
00633 
00634         Mutable version of Uri
00635 
00636 *******************************************************************************/
00637 
00638 class MutableUri : Uri
00639 {
00640         /***********************************************************************
00641         
00642                 Create an empty Uri
00643 
00644         ***********************************************************************/
00645 
00646         this ()
00647         {
00648                 super();
00649         }
00650 
00651         /***********************************************************************
00652         
00653                 Create a Uri from the provided text string.
00654 
00655         ***********************************************************************/
00656 
00657         this (char[] uri)
00658         {
00659                 super (uri);
00660         }
00661 
00662         /***********************************************************************
00663         
00664                 Construct a Uri from the given components. The query is
00665                 optional.
00666                 
00667         ***********************************************************************/
00668 
00669         this (char[] scheme, char[] host, char[] path, char[] query = null)
00670         {
00671                 super();
00672 
00673                 this.scheme = scheme;
00674                 this.query = query;
00675                 this.host = host;
00676                 this.path = path;
00677         }
00678 
00679         /***********************************************************************
00680         
00681                 Clone another Uri. This can be used to make a MutableUri
00682                 from an immutable Uri.
00683 
00684         ***********************************************************************/
00685 
00686         static MutableUri clone (Uri uri)
00687         {
00688                 with (uri)
00689                      {
00690                      MutableUri ret = new MutableUri (scheme, host, path, query);
00691                      ret.userinfo = userinfo;
00692                      ret.fragment = fragment;
00693                      ret.port = port;
00694                      return ret;
00695                      }
00696         }
00697 
00698         /***********************************************************************
00699         
00700                 Clear everything to null.
00701 
00702         ***********************************************************************/
00703 
00704         void reset()
00705         {
00706                 decoded.reset();
00707                 port = InvalidPort;
00708                 host = path = query = scheme = userinfo = fragment = null;
00709         }
00710 
00711         /***********************************************************************
00712         
00713                 Parse the given uri string
00714 
00715         ***********************************************************************/
00716 
00717         MutableUri parse (char[] uri)
00718         {       
00719                 super.parse (uri);
00720                 return this;
00721         }
00722 
00723         /***********************************************************************
00724                 
00725                 Set the Uri scheme
00726 
00727         ***********************************************************************/
00728 
00729         MutableUri setScheme (char[] scheme)
00730         {
00731                 this.scheme = scheme;
00732                 return this;
00733         }
00734 
00735         /***********************************************************************
00736         
00737                 Set the Uri host
00738 
00739         ***********************************************************************/
00740 
00741         MutableUri setHost (char[] host)
00742         {
00743                 this.host = host;
00744                 return this;
00745         }
00746 
00747         /***********************************************************************
00748         
00749                 Set the Uri port
00750 
00751         ***********************************************************************/
00752 
00753         MutableUri setPort (int port)
00754         {
00755                 this.port = port;
00756                 return this;
00757         }
00758 
00759         /***********************************************************************
00760         
00761                 Set the Uri userinfo
00762 
00763         ***********************************************************************/
00764 
00765         MutableUri setUserInfo(char[] userinfo)
00766         {
00767                 this.userinfo = userinfo;
00768                 return this;
00769         }
00770 
00771         /***********************************************************************
00772         
00773                 Set the Uri query
00774 
00775         ***********************************************************************/
00776 
00777         MutableUri setQuery (char[] query)
00778         {
00779                 this.query = query;
00780                 return this;
00781         }
00782 
00783         /***********************************************************************
00784         
00785                 Set the Uri path
00786 
00787         ***********************************************************************/
00788 
00789         MutableUri setPath (char[] path)
00790         {
00791                 this.path = path;
00792                 return this;
00793         }
00794 
00795         /***********************************************************************
00796         
00797                 Set the Uri fragment
00798 
00799         ***********************************************************************/
00800 
00801         MutableUri setFragment (char[] fragment)
00802         {
00803                 this.fragment = fragment;
00804                 return this;
00805         }
00806 }
00807 
00808 
00809 

Generated on Sun Mar 6 00:31:00 2005 for Mango by doxygen 1.3.6