Main Page | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members | Related Pages

Uri.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file Uri.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032         
00033         @version        Initial version, April 2004      
00034         @author         Kris
00035 
00036 
00037 *******************************************************************************/
00038 
00039 module mango.io.Uri;
00040 
00041 private import  mango.io.Exception;
00042 
00043 private import  mango.convert.Integer;
00044 
00045 private import  mango.utils.HeapSlice;
00046 
00047 private import  mango.io.model.IWriter;
00048 
00049 /*******************************************************************************
00050 
00051 *******************************************************************************/
00052 
00053 extern (C) char* memchr (char *, char, uint);
00054 
00055 /*******************************************************************************
00056 
00057         Implements an RFC 2396 compliant URI specification. See 
00058         <A HREF="http://ftp.ics.uci.edu/pub/ietf/uri/rfc2396.txt">this page</A>
00059         for more information. 
00060 
00061         The implementation fails the spec on two counts: it doesn't insist
00062         on a scheme being present in the Uri, and it doesn't implement the
00063         "Relative References" support noted in section 5.2. Note that IRI
00064         support can be added by assuming each of userinfo, path, query, and 
00065         fragment are UTF-8 encoded 
00066         (see <A HREF="http://www.w3.org/2001/Talks/0912-IUC-IRI/paper.html">
00067         this page</A> for further details).
00068 
00069         Use the MutableUri derivative where you need to alter specific uri
00070         attributes. 
00071 
00072 *******************************************************************************/
00073 
00074 class Uri : IWritable
00075 {
00076         public const int        InvalidPort = -1;
00077 
00078         private int             port;
00079         private char[]          host,
00080                                 path,
00081                                 query,
00082                                 scheme,
00083                                 userinfo,
00084                                 fragment;
00085         private HeapSlice       decoded;
00086 
00087         private static ubyte    map[256];
00088 
00089                     
00090         private static short[char[]] genericSchemes;
00091 
00092         private static const char[] hexDigits = "0123456789abcdef";
00093 
00094         private static IOException error;
00095 
00096         private enum    {
00097                         ExcScheme       = 0x01, 
00098                         ExcAuthority    = 0x02, 
00099                         ExcPath         = 0x04, 
00100                         ExcQuery        = 0x08, 
00101                         IncUser         = 0x10, 
00102                         IncPath         = 0x20,
00103                         IncQuery        = 0x40,
00104                         IncScheme       = 0x80,
00105                         IncGeneric      = IncScheme | IncUser | IncPath | IncQuery
00106                         };
00107 
00108         private struct SchemePort
00109         {
00110                         char[]  name;
00111                         short   port;
00112         }
00113 
00114         private static  const SchemePort[] schemePorts =
00115                         [
00116                         {"coffee",      80},
00117                         {"file",        InvalidPort},
00118                         {"ftp",         21},
00119                         {"gopher",      70},
00120                         {"hnews",       80},
00121                         {"http",        80},
00122                         {"http-ng",     80},
00123                         {"https",       443},
00124                         {"imap",        143},
00125                         {"irc",         194}, 
00126                         {"ldap",        389},
00127                         {"news",        119},
00128                         {"nfs",         2049}, 
00129                         {"nntp",        119},
00130                         {"pop",         110}, 
00131                         {"prospero",    1525},
00132                         {"rwhois",      4321},
00133                         {"sip",         InvalidPort},
00134                         {"sips",        InvalidPort},
00135                         {"sipt",        InvalidPort},
00136                         {"sipu",        InvalidPort},
00137                         {"shttp",       80},
00138                         {"smtp",        25},
00139                         {"snews",       563},
00140                         {"telnet",      23},
00141                         {"vemmi",       575},
00142                         {"videotex",    516},
00143                         {"wais",        210},
00144                         {"whois",       43},
00145                         {"whois++",     43},
00146                         ];
00147 
00148 
00149         /***********************************************************************
00150         
00151                 Initialize the Uri character maps and so on
00152 
00153         ***********************************************************************/
00154 
00155         static this ()
00156         {
00157                 error = new IOException ("Invalid URI specification");
00158 
00159                 // Map known generic schemes to their default port. Specify
00160                 // InvalidPort for those schemes that don't use ports. Note
00161                 // that a port value of zero is not supported ...
00162                 foreach (SchemePort sp; schemePorts)
00163                          genericSchemes[sp.name] = sp.port;
00164                 genericSchemes.rehash;
00165 
00166                 // load the character map with valid symbols
00167                 for (int i='a'; i <= 'z'; ++i)  
00168                      map[i] = IncGeneric;
00169 
00170                 for (int i='A'; i <= 'Z'; ++i)  
00171                      map[i] = IncGeneric;
00172 
00173                 for (int i='0'; i<='9'; ++i)  
00174                      map[i] = IncGeneric;
00175 
00176                 // exclude these from parsing elements
00177                 map[':'] |= ExcScheme;
00178                 map['/'] |= ExcScheme | ExcAuthority;
00179                 map['?'] |= ExcScheme | ExcAuthority | ExcPath;
00180                 map['#'] |= ExcScheme | ExcAuthority | ExcPath | ExcQuery;
00181 
00182                 // include these as common symbols
00183                 map['-'] |= IncUser | IncQuery;
00184                 map['_'] |= IncUser | IncQuery;
00185                 map['.'] |= IncUser | IncQuery;
00186                 map['!'] |= IncUser | IncQuery;
00187                 map['~'] |= IncUser | IncQuery;
00188                 map['*'] |= IncUser | IncQuery;
00189                 map['\''] |= IncUser | IncQuery;
00190                 map['('] |= IncUser | IncQuery;
00191                 map[')'] |= IncUser | IncQuery;
00192 
00193                 // include these as scheme symbols
00194                 map['+'] |= IncScheme;
00195                 map['-'] |= IncScheme;
00196                 map['.'] |= IncScheme;
00197 
00198                 // include these as userinfo symbols
00199                 map[';'] |= IncUser;
00200                 map[':'] |= IncUser;
00201                 map['&'] |= IncUser;
00202                 map['='] |= IncUser;
00203                 map['+'] |= IncUser;
00204                 map['$'] |= IncUser;
00205                 map[','] |= IncUser;
00206 
00207                 // include these as path symbols
00208                 map['/'] |= IncPath;
00209                 map[';'] |= IncPath;
00210                 map[':'] |= IncPath;
00211                 map['@'] |= IncPath;
00212                 map['&'] |= IncPath;
00213                 map['='] |= IncPath;
00214                 map['+'] |= IncPath;
00215                 map['$'] |= IncPath;
00216                 map[','] |= IncPath;
00217 
00218                 // include these as query symbols
00219                 map[';'] |= IncQuery;
00220                 map['/'] |= IncQuery;
00221                 map['?'] |= IncQuery;
00222                 map[':'] |= IncQuery;
00223                 map['@'] |= IncQuery;
00224                 map['&'] |= IncQuery;
00225                 map['='] |= IncQuery;
00226                 map['+'] |= IncQuery;
00227                 map['$'] |= IncQuery;
00228                 map[','] |= IncQuery;
00229         }
00230         
00231         /***********************************************************************
00232         
00233                 Construct a Uri from the provided character string
00234 
00235         ***********************************************************************/
00236 
00237         this (char[] uri)
00238         {
00239                 this();
00240                 parse (uri);
00241         }
00242 
00243         /***********************************************************************
00244         
00245                 Return the default port for the given scheme. InvalidPort
00246                 is returned if the scheme is unknown, or does not accept
00247                 a port.
00248 
00249         ***********************************************************************/
00250 
00251         final static int getDefaultPort (char[] scheme)
00252         {
00253                 short* port = scheme in genericSchemes; 
00254                 if (port is null)
00255                     return InvalidPort;
00256                 return *port;
00257         }
00258 
00259         /***********************************************************************
00260         
00261                 Return the parsed scheme, or null if the scheme was not
00262                 specified
00263 
00264         ***********************************************************************/
00265 
00266         char[] getScheme()
00267         {
00268                 return scheme;
00269         }
00270 
00271         /***********************************************************************
00272         
00273                 Return the parsed host, or null if the host was not
00274                 specified
00275 
00276         ***********************************************************************/
00277 
00278         char[] getHost()
00279         {
00280                 return host;
00281         }
00282 
00283         /***********************************************************************
00284         
00285                 Return the parsed port number, or InvalidPort if the port
00286                 was not provided.
00287 
00288         ***********************************************************************/
00289 
00290         int getPort()
00291         {
00292                 return port;
00293         }
00294 
00295         /***********************************************************************
00296         
00297                 Return a valid port number by performing a lookup on the 
00298                 known schemes if the port was not explicitly specified.
00299 
00300         ***********************************************************************/
00301 
00302         int getValidPort()
00303         {
00304                 if (port == InvalidPort)
00305                     return getDefaultPort (scheme);
00306                 return port;
00307         }
00308 
00309         /***********************************************************************
00310         
00311                 Return the parsed userinfo, or null if userinfo was not 
00312                 provided.
00313 
00314         ***********************************************************************/
00315 
00316         char[] getUserInfo()
00317         {
00318                 return userinfo;
00319         }
00320 
00321         /***********************************************************************
00322         
00323                 Return the parsed path, or null if the path was not 
00324                 provided.
00325 
00326         ***********************************************************************/
00327 
00328         char[] getPath()
00329         {
00330                 return path;
00331         }
00332 
00333         /***********************************************************************
00334         
00335                 Return the parsed query, or null if a query was not 
00336                 provided.
00337 
00338         ***********************************************************************/
00339 
00340         char[] getQuery()
00341         {
00342                 return query;
00343         }
00344 
00345         /***********************************************************************
00346         
00347                 Return the parsed fragment, or null if a fragment was not 
00348                 provided.
00349 
00350         ***********************************************************************/
00351 
00352         char[] getFragment()
00353         {
00354                 return fragment;
00355         }
00356 
00357         /***********************************************************************
00358         
00359                 return whether or not the Uri scheme is considered generic.
00360 
00361         ***********************************************************************/
00362 
00363         bool isGeneric ()
00364         {
00365                 return cast(bool) ((scheme in genericSchemes) !is null);
00366         }
00367 
00368         /***********************************************************************
00369         
00370                 Write the content of this Uri to the provided buffer. The
00371                 output is constructed per RFC 2396
00372 
00373         ***********************************************************************/
00374 
00375         IBuffer write (IBuffer buf)
00376         {
00377                 if (scheme.length)
00378                     buf.append (scheme).append(":");
00379 
00380 
00381                 if (userinfo.length || host.length || port != InvalidPort)
00382                    {
00383                    buf.append ("//");
00384 
00385                    if (userinfo.length)
00386                        encode (buf, userinfo, IncUser).append("@");
00387 
00388                    if (host.length)
00389                        buf.append (host);
00390 
00391                    if (port != InvalidPort && port != getDefaultPort(scheme))
00392                       {
00393                       char[4] tmp;
00394                       buf.append(":").append(Integer.format (tmp, port));
00395                       }
00396                    }
00397 
00398                 if (path.length)
00399                     encode (buf, path, IncPath);
00400 
00401                 if (query.length)
00402                    {
00403                    buf.append ("?");
00404                    encode (buf, query, IncQuery);
00405                    }
00406 
00407                 if (fragment.length)
00408                    {
00409                    buf.append ("#");
00410                    encode (buf, fragment, IncQuery);
00411                    }
00412 
00413                 return buf;
00414         }
00415 
00416         /***********************************************************************
00417         
00418                 Write the content of this Uri to the provided writer. The
00419                 output is constructed per RFC 2396
00420 
00421         ***********************************************************************/
00422 
00423         void write (IWriter writer)
00424         {
00425                 write (writer.getBuffer);
00426         }
00427 
00428         /***********************************************************************
00429         
00430                 Decode a character string with potential %hex values in it.
00431                 The decoded strings are placed into a thread-safe expanding
00432                 buffer, and a slice of it is returned to the requestor.
00433 
00434         ***********************************************************************/
00435 
00436         char[] decode (char[] s)
00437         {
00438                 static int toInt (char c)
00439                 {
00440                         if (c >= '0' && c <= '9')
00441                             c -= '0';
00442                         else
00443                         if (c >= 'a' && c <= 'f')
00444                             c -= ('a' - 10);
00445                         else
00446                         if (c >= 'A' && c <= 'F')
00447                             c -= ('A' - 10);
00448                         return c;
00449                 }
00450                 
00451                 int length = s.length;
00452 
00453                 // take a peek first, to see if there's work to do
00454                 if (length && memchr (s, '%', length))
00455                    {
00456                    char* p;
00457                    int   j;
00458                         
00459                    // ensure we have enough decoding space available
00460                    p = cast(char*) decoded.expand (length);
00461 
00462                    // scan string, stripping % encodings as we go
00463                    for (int i; i < length; ++i, ++j, ++p)
00464                        {
00465                        int c = s[i];
00466                        if (c == '%' && (i+2) < length)
00467                           {
00468                           c = toInt(s[i+1]) * 16 + toInt(s[i+2]);
00469                           i += 2;
00470                           }
00471 
00472                        *p = c;
00473                        }
00474                    // return a slice from the decoded input
00475                    return cast(char[]) decoded.slice (j);
00476                    }
00477 
00478                 // return original content
00479                 return s;
00480         }   
00481 
00482         /***********************************************************************
00483         
00484                 Encode uri characters into an output buffer, such that
00485                 reserved chars are converted into their %hex version.
00486 
00487         ***********************************************************************/
00488 
00489         private static IBuffer encode (IBuffer buf, char[] s, int flags)
00490         {
00491                 char[3] hex;
00492                 int     mark;
00493 
00494                 hex[0] = '%';
00495                 foreach (int i, char c; s)
00496                         {
00497                         if (! (map[c] & flags))
00498                            {
00499                            buf.append (s[mark..i]);
00500                            mark = i+1;
00501                                 
00502                            hex[1] = hexDigits [(c >> 4) & 0x0f];
00503                            hex[2] = hexDigits [c & 0x0f];
00504                            buf.append (hex);
00505                            }
00506                         }
00507 
00508                 // add trailing section
00509                 if (mark < s.length)
00510                     buf.append (s[mark..s.length]);
00511 
00512                 return buf;
00513         }
00514 
00515         /***********************************************************************
00516         
00517                 This should not be exposed outside of this module!
00518 
00519         ***********************************************************************/
00520 
00521         private this ()
00522         {
00523                 port = InvalidPort;
00524                 decoded = new HeapSlice (256);
00525         }
00526 
00527         /***********************************************************************
00528         
00529                 Parsing is performed according to RFC 2396
00530                 
00531                 @code
00532                   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
00533                    12            3  4          5       6  7        8 9
00534                     
00535                 2 isolates scheme
00536                 4 isolates authority
00537                 5 isolates path
00538                 7 isolates query
00539                 9 isolates fragment
00540                 @endcode
00541 
00542                 This was originally a state-machine; it turned out to be a 
00543                 lot faster (~40%) when unwound like this instead.
00544                 
00545         ***********************************************************************/
00546 
00547         private void parse (char[] uri)
00548         {
00549                 char    c;
00550                 int     i, 
00551                         mark, 
00552                         len = uri.length;
00553 
00554                 // isolate scheme (note that it's OK to not specify a scheme)
00555                 for (i=0; i < len && !(map[c = uri[i]] & ExcScheme); ++i) {}
00556                 if (c == ':')
00557                    {
00558                    scheme = uri [mark..i];
00559                    toLower (scheme);
00560                    mark = i + 1;
00561                    }
00562 
00563                 // isolate authority
00564                 if (mark < len-1  &&  uri[mark] == '/'  &&  uri[mark+1] == '/')
00565                    {
00566                    for (mark+=2, i=mark; i < len && !(map[uri[i]] & ExcAuthority); ++i) {}
00567                    parseAuthority (uri[mark..i]); 
00568                    mark = i;
00569                    }
00570 
00571                 // isolate path
00572                 for (i=mark; i < len && !(map[uri[i]] & ExcPath); ++i) {}
00573                 path = decode (uri[mark..i]);
00574                 mark = i;
00575 
00576                 // isolate query
00577                 if (mark < len && uri[mark] == '?')
00578                    {
00579                    for (++mark, i=mark; i < len && uri[i] != '#'; ++i) {}
00580                    query = decode (uri[mark..i]);
00581                    mark = i;
00582                    }
00583 
00584                 // isolate fragment
00585                 if (mark < len && uri[mark] == '#')
00586                     fragment = decode (uri[mark+1..len]);
00587         }
00588         
00589         /***********************************************************************
00590         
00591                 Authority is the section after the scheme, but before the 
00592                 path, query or fragment; it typically represents a host.
00593                
00594                 @code
00595                     ^(([^@]*)@?)([^:]*)?(:(.*))?
00596                      12         3       4 5
00597                   
00598                 2 isolates userinfo
00599                 3 isolates host
00600                 5 isolates port
00601                 @endcode
00602 
00603         ***********************************************************************/
00604 
00605         private void parseAuthority (char[] auth)
00606         {
00607                 int     mark,
00608                         len = auth.length;
00609 
00610                 // get userinfo: (([^@]*)@?)
00611                 foreach (int i, char c; auth)
00612                          if (c == '@')
00613                             {
00614                             userinfo = decode (auth[0..i]);
00615                             mark = i + 1;
00616                             break;
00617                             }
00618 
00619                 // get port: (:(.*))?
00620                 for (int i=mark; i < len; ++i)
00621                      if (auth [i] == ':')
00622                         {
00623                         port = cast(int) Integer.parse (auth [i+1..len]);
00624                         len = i;
00625                         break;
00626                         }
00627 
00628                 // get host: ([^:]*)?
00629                 host = auth [mark..len];
00630         }
00631 
00632         /**********************************************************************
00633 
00634                 in-place conversion to lowercase 
00635 
00636         **********************************************************************/
00637 
00638         final static char[] toLower (inout char[] src)
00639         {
00640                 foreach (int i, char c; src)
00641                          if (c >= 'A' && c <= 'Z')
00642                              src[i] = c + ('a' - 'A');
00643                 return src;
00644         }
00645 }
00646 
00647 
00648 
00649 /*******************************************************************************
00650 
00651         Mutable version of Uri
00652 
00653 *******************************************************************************/
00654 
00655 class MutableUri : Uri
00656 {
00657         /***********************************************************************
00658         
00659                 Create an empty Uri
00660 
00661         ***********************************************************************/
00662 
00663         this ()
00664         {
00665                 super();
00666         }
00667 
00668         /***********************************************************************
00669         
00670                 Create a Uri from the provided text string.
00671 
00672         ***********************************************************************/
00673 
00674         this (char[] uri)
00675         {
00676                 super (uri);
00677         }
00678 
00679         /***********************************************************************
00680         
00681                 Construct a Uri from the given components. The query is
00682                 optional.
00683                 
00684         ***********************************************************************/
00685 
00686         this (char[] scheme, char[] host, char[] path, char[] query = null)
00687         {
00688                 super();
00689 
00690                 this.scheme = scheme;
00691                 this.query = query;
00692                 this.host = host;
00693                 this.path = path;
00694         }
00695 
00696         /***********************************************************************
00697         
00698                 Clone another Uri. This can be used to make a MutableUri
00699                 from an immutable Uri.
00700 
00701         ***********************************************************************/
00702 
00703         static MutableUri clone (Uri uri)
00704         {
00705                 with (uri)
00706                      {
00707                      MutableUri ret = new MutableUri (scheme, host, path, query);
00708                      ret.userinfo = userinfo;
00709                      ret.fragment = fragment;
00710                      ret.port = port;
00711                      return ret;
00712                      }
00713         }
00714 
00715         /***********************************************************************
00716         
00717                 Clear everything to null.
00718 
00719         ***********************************************************************/
00720 
00721         void reset()
00722         {
00723                 decoded.reset();
00724                 port = InvalidPort;
00725                 host = path = query = scheme = userinfo = fragment = null;
00726         }
00727 
00728         /***********************************************************************
00729         
00730                 Parse the given uri string
00731 
00732         ***********************************************************************/
00733 
00734         MutableUri parse (char[] uri)
00735         {       
00736                 super.parse (uri);
00737                 return this;
00738         }
00739 
00740         /***********************************************************************
00741                 
00742                 Set the Uri scheme
00743 
00744         ***********************************************************************/
00745 
00746         MutableUri setScheme (char[] scheme)
00747         {
00748                 this.scheme = scheme;
00749                 return this;
00750         }
00751 
00752         /***********************************************************************
00753         
00754                 Set the Uri host
00755 
00756         ***********************************************************************/
00757 
00758         MutableUri setHost (char[] host)
00759         {
00760                 this.host = host;
00761                 return this;
00762         }
00763 
00764         /***********************************************************************
00765         
00766                 Set the Uri port
00767 
00768         ***********************************************************************/
00769 
00770         MutableUri setPort (int port)
00771         {
00772                 this.port = port;
00773                 return this;
00774         }
00775 
00776         /***********************************************************************
00777         
00778                 Set the Uri userinfo
00779 
00780         ***********************************************************************/
00781 
00782         MutableUri setUserInfo(char[] userinfo)
00783         {
00784                 this.userinfo = userinfo;
00785                 return this;
00786         }
00787 
00788         /***********************************************************************
00789         
00790                 Set the Uri query
00791 
00792         ***********************************************************************/
00793 
00794         MutableUri setQuery (char[] query)
00795         {
00796                 this.query = query;
00797                 return this;
00798         }
00799 
00800         /***********************************************************************
00801         
00802                 Extend the Uri query
00803 
00804         ***********************************************************************/
00805 
00806         char[] extendQuery (char[] tail)
00807         {
00808                 if (tail.length)
00809                     if (query.length)
00810                         query = query ~ "&" ~ tail;
00811                     else
00812                        query = tail;
00813                 return query;
00814         }
00815 
00816         /***********************************************************************
00817         
00818                 Set the Uri path
00819 
00820         ***********************************************************************/
00821 
00822         MutableUri setPath (char[] path)
00823         {
00824                 this.path = path;
00825                 return this;
00826         }
00827 
00828         /***********************************************************************
00829         
00830                 Set the Uri fragment
00831 
00832         ***********************************************************************/
00833 
00834         MutableUri setFragment (char[] fragment)
00835         {
00836                 this.fragment = fragment;
00837                 return this;
00838         }
00839 }
00840 
00841 

Generated on Sat Dec 24 17:28:34 2005 for Mango by  doxygen 1.4.0