Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

Uri.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file Uri.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032         
00033         @version        Initial version, April 2004      
00034         @author         Kris
00035 
00036 
00037 *******************************************************************************/
00038 
00039 module mango.io.Uri;
00040 
00041 private import  std.ctype;
00042 
00043 private import  mango.utils.Text,
00044                 mango.utils.HeapSlice;
00045 
00046 private import  mango.io.Buffer,
00047                 mango.io.Exception;
00048 
00049 private import  mango.io.model.IWriter;
00050 
00051 /*******************************************************************************
00052 
00053 *******************************************************************************/
00054 
00055 extern (C) char* memchr (char *, char, uint);
00056 
00057 /*******************************************************************************
00058 
00059         Implements an RFC 2396 compliant URI specification. See 
00060         <A HREF="http://ftp.ics.uci.edu/pub/ietf/uri/rfc2396.txt">this page</A>
00061         for more information. 
00062 
00063         The implementation fails the spec on two counts: it doesn't insist
00064         on a scheme being present in the Uri, and it doesn't implement the
00065         "Relative References" support noted in section 5.2. Note that IRI
00066         support can be added by assuming each of userinfo, path, query, and 
00067         fragment are UTF-8 encoded 
00068         (see <A HREF="http://www.w3.org/2001/Talks/0912-IUC-IRI/paper.html">
00069         this page</A> for further details).
00070 
00071         Use the MutableUri derivative where you need to alter specific uri
00072         attributes. 
00073 
00074 *******************************************************************************/
00075 
00076 class Uri : IWritable
00077 {
00078         /***********************************************************************
00079         
00080                 Initialize the Uri character maps and so on
00081 
00082         ***********************************************************************/
00083 
00084         static this ()
00085         {
00086                 error = new IOException ("Invalid URI specification");
00087 
00088                 // Map known generic schemes to their default port. Specify
00089                 // InvalidPort for those schemes that don't use ports. Note
00090                 // that a port value of zero is not supported ...
00091                 foreach (SchemePort sp; schemePorts)
00092                          genericSchemes[sp.name] = sp.port;
00093                 genericSchemes.rehash;
00094 
00095                 // load the character map with valid symbols
00096                 for (int i='a'; i <= 'z'; ++i)  
00097                      map[i] = IncGeneric;
00098 
00099                 for (int i='A'; i <= 'Z'; ++i)  
00100                      map[i] = IncGeneric;
00101 
00102                 for (int i='0'; i<='9'; ++i)  
00103                      map[i] = IncGeneric;
00104 
00105                 // exclude these from parsing elements
00106                 map[':'] |= ExcScheme;
00107                 map['/'] |= ExcScheme | ExcAuthority;
00108                 map['?'] |= ExcScheme | ExcAuthority | ExcPath;
00109                 map['#'] |= ExcScheme | ExcAuthority | ExcPath | ExcQuery;
00110 
00111                 // include these as common symbols
00112                 map['-'] |= IncUser | IncQuery;
00113                 map['_'] |= IncUser | IncQuery;
00114                 map['.'] |= IncUser | IncQuery;
00115                 map['!'] |= IncUser | IncQuery;
00116                 map['~'] |= IncUser | IncQuery;
00117                 map['*'] |= IncUser | IncQuery;
00118                 map['\''] |= IncUser | IncQuery;
00119                 map['('] |= IncUser | IncQuery;
00120                 map[')'] |= IncUser | IncQuery;
00121 
00122                 // include these as scheme symbols
00123                 map['+'] |= IncScheme;
00124                 map['-'] |= IncScheme;
00125                 map['.'] |= IncScheme;
00126 
00127                 // include these as userinfo symbols
00128                 map[';'] |= IncUser;
00129                 map[':'] |= IncUser;
00130                 map['&'] |= IncUser;
00131                 map['='] |= IncUser;
00132                 map['+'] |= IncUser;
00133                 map['$'] |= IncUser;
00134                 map[','] |= IncUser;
00135 
00136                 // include these as path symbols
00137                 map['/'] |= IncPath;
00138                 map[';'] |= IncPath;
00139                 map[':'] |= IncPath;
00140                 map['@'] |= IncPath;
00141                 map['&'] |= IncPath;
00142                 map['='] |= IncPath;
00143                 map['+'] |= IncPath;
00144                 map['$'] |= IncPath;
00145                 map[','] |= IncPath;
00146 
00147                 // include these as query symbols
00148                 map[';'] |= IncQuery;
00149                 map['/'] |= IncQuery;
00150                 map['?'] |= IncQuery;
00151                 map[':'] |= IncQuery;
00152                 map['@'] |= IncQuery;
00153                 map['&'] |= IncQuery;
00154                 map['='] |= IncQuery;
00155                 map['+'] |= IncQuery;
00156                 map['$'] |= IncQuery;
00157                 map[','] |= IncQuery;
00158         }
00159         
00160         /***********************************************************************
00161         
00162                 Construct a Uri from the provided character string
00163 
00164         ***********************************************************************/
00165 
00166         this (char[] uri)
00167         {
00168                 this();
00169                 parse (uri);
00170         }
00171 
00172         /***********************************************************************
00173         
00174                 Return the default port for the given scheme. InvalidPort
00175                 is returned if the scheme is unknown, or does not accept
00176                 a port.
00177 
00178         ***********************************************************************/
00179 
00180         final static int getDefaultPort (char[] scheme)
00181         {
00182                 int port = genericSchemes [scheme]; 
00183                 if (! port)
00184                       port = InvalidPort;
00185                 return port;
00186         }
00187 
00188         /***********************************************************************
00189         
00190                 Return the parsed scheme, or null if the scheme was not
00191                 specified
00192 
00193         ***********************************************************************/
00194 
00195         char[] getScheme()
00196         {
00197                 return scheme;
00198         }
00199 
00200         /***********************************************************************
00201         
00202                 Return the parsed host, or null if the host was not
00203                 specified
00204 
00205         ***********************************************************************/
00206 
00207         char[] getHost()
00208         {
00209                 return host;
00210         }
00211 
00212         /***********************************************************************
00213         
00214                 Return the parsed port number, or InvalidPort if the port
00215                 was not provided.
00216 
00217         ***********************************************************************/
00218 
00219         int getPort()
00220         {
00221                 return port;
00222         }
00223 
00224         /***********************************************************************
00225         
00226                 Return a valid port number by performing a lookup on the 
00227                 known schemes if the port was not explicitly specified.
00228 
00229         ***********************************************************************/
00230 
00231         int getValidPort()
00232         {
00233                 if (port == InvalidPort)
00234                     return getDefaultPort (scheme);
00235                 return port;
00236         }
00237 
00238         /***********************************************************************
00239         
00240                 Return the parsed userinfo, or null if userinfo was not 
00241                 provided.
00242 
00243         ***********************************************************************/
00244 
00245         char[] getUserInfo()
00246         {
00247                 return userinfo;
00248         }
00249 
00250         /***********************************************************************
00251         
00252                 Return the parsed path, or null if the path was not 
00253                 provided.
00254 
00255         ***********************************************************************/
00256 
00257         char[] getPath()
00258         {
00259                 return path;
00260         }
00261 
00262         /***********************************************************************
00263         
00264                 Return the parsed query, or null if a query was not 
00265                 provided.
00266 
00267         ***********************************************************************/
00268 
00269         char[] getQuery()
00270         {
00271                 return query;
00272         }
00273 
00274         /***********************************************************************
00275         
00276                 Return the parsed fragment, or null if a fragment was not 
00277                 provided.
00278 
00279         ***********************************************************************/
00280 
00281         char[] getFragment()
00282         {
00283                 return fragment;
00284         }
00285 
00286         /***********************************************************************
00287         
00288                 return whether or not the Uri scheme is considered generic.
00289 
00290         ***********************************************************************/
00291 
00292         bool isGeneric ()
00293         {
00294                 return genericSchemes [scheme] != 0;
00295         }
00296 
00297         /***********************************************************************
00298         
00299                 Write the content of this Uri to the provided buffer. The
00300                 output is constructed per RFC 2396
00301 
00302         ***********************************************************************/
00303 
00304         IBuffer write (IBuffer buf)
00305         {
00306                 if (scheme.length)
00307                     buf.append (scheme).append(":");
00308 
00309 
00310                 if (userinfo.length || host.length || port != InvalidPort)
00311                    {
00312                    buf.append ("//");
00313 
00314                    if (userinfo.length)
00315                        encode (buf, userinfo, IncUser).append("@");

                   if (host.length)
                       buf.append (host);

                   if (port != InvalidPort && port != getDefaultPort(scheme))
                      {
                      char[5] tmp;
                      buf.append(":").append(Text.itoa (tmp, port));
00316                       }
00317                    }
00318 
00319                 if (path.length)
00320                     encode (buf, path, IncPath);
00321 
00322                 if (query.length)
00323                    {
00324                    buf.append ("?");
00325                    encode (buf, query, IncQuery);
00326                    }
00327 
00328                 if (fragment.length)
00329                    {
00330                    buf.append ("#");
00331                    encode (buf, fragment, IncQuery);
00332                    }
00333 
00334                 return buf;
00335         }
00336 
00337         /***********************************************************************
00338         
00339                 Write the content of this Uri to the provided writer. The
00340                 output is constructed per RFC 2396
00341 
00342         ***********************************************************************/
00343 
00344         void write (IWriter writer)
00345         {
00346                 write (writer.getBuffer);
00347         }
00348 
00349         /***********************************************************************
00350         
00351                 Convert this Uri to a character string
00352 
00353         ***********************************************************************/
00354 
00355         override char[] toString ()
00356         {
00357                 return write (new GrowableBuffer(1024)).toString;
00358         }
00359 
00360         /***********************************************************************
00361         
00362                 Decode a character string with potential %hex values in it.
00363                 The decoded strings are placed into a thread-safe expanding
00364                 buffer, and a slice of it is returned to the requestor.
00365 
00366         ***********************************************************************/
00367 
00368         char[] decode (char[] s)
00369         {
00370                 int length = s.length;
00371 
00372                 // take a peek first, to see if there's work to do
00373                 if (length && memchr (s, '%', length))
00374                    {
00375                    char* p;
00376                    int   j;
00377                         
00378                    // ensure we have enough decoding space available
00379                    p = cast(char*) decoded.expand (length);
00380 
00381                    // scan string, stripping % encodings as we go
00382                    for (int i; i < length; ++i, ++j, ++p)
00383                        {
00384                        char c = s[i];
00385                        if (c == '%' && (i+2) < length)
00386                           {
00387                           c = Text.atoi (s[i+1..i+3], 16);
00388                           i += 2;
00389                           }
00390                        *p = c;
00391                        }
00392                    // return a slice from the decoded input
00393                    return cast(char[]) decoded.slice (j);
00394                    }
00395 
00396                 // return original content
00397                 return s;
00398         }   
00399 
00400         /***********************************************************************
00401         
00402                 Encode uri characters into an output buffer, such that
00403                 reserved chars are converted into their %hex version.
00404 
00405         ***********************************************************************/
00406 
00407         private static IBuffer encode (IBuffer buf, char[] s, int flags)
00408         {
00409                 char[3] hex;
00410                 int     mark;
00411 
00412                 hex[0] = '%';
00413                 foreach (int i, char c; s)
00414                         {
00415                         if (! (map[c] & flags))
00416                            {
00417                            buf.append (s[mark..i]);
00418                            mark = i+1;
00419                                 
00420                            hex[1] = hexDigits [(c >> 4) & 0x0f];
00421                            hex[2] = hexDigits [c & 0x0f];
00422                            buf.append (hex);
00423                            }
00424                         }
00425 
00426                 // add trailing section
00427                 if (mark < s.length)
00428                     buf.append (s[mark..s.length]);
00429 
00430                 return buf;
00431         }
00432 
00433         /***********************************************************************
00434         
00435                 This should not be exposed outside of this module!
00436 
00437         ***********************************************************************/
00438 
00439         private this ()
00440         {
00441                 port = InvalidPort;
00442                 decoded = new HeapSlice (256);
00443         }
00444 
00445         /***********************************************************************
00446         
00447                 Parsing is performed according to RFC 2396
00448                 
00449                 @code
00450                   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
00451                    12            3  4          5       6  7        8 9
00452                     
00453                 2 isolates scheme
00454                 4 isolates authority
00455                 5 isolates path
00456                 7 isolates query
00457                 9 isolates fragment
00458                 @endcode
00459 
00460                 This was originally a state-machine; it turned out to be a 
00461                 lot faster (~40%) when unwound like this instead.
00462                 
00463         ***********************************************************************/
00464 
00465         private void parse (char[] uri)
00466         {
00467                 char    c;
00468                 int     i, 
00469                         mark, 
00470                         len = uri.length;
00471 
00472                 // isolate scheme (note that it's OK to not specify a scheme)
00473                 for (i=0; i < len && !(map[c = uri[i]] & ExcScheme); ++i) {}
00474                 if (c == ':')
00475                    {
00476                    scheme = uri[mark..i];
00477                    Text.tolower (scheme);
00478                    mark = i + 1;
00479                    }
00480 
00481                 // isolate authority
00482                 if (mark < len-1  &&  uri[mark] == '/'  &&  uri[mark+1] == '/')
00483                    {
00484                    for (mark+=2, i=mark; i < len && !(map[uri[i]] & ExcAuthority); ++i) {}
00485                    parseAuthority (uri[mark..i]); 
00486                    mark = i;
00487                    }
00488 
00489                 // isolate path
00490                 for (i=mark; i < len && !(map[uri[i]] & ExcPath); ++i) {}
00491                 path = decode (uri[mark..i]);
00492                 mark = i;
00493 
00494                 // isolate query
00495                 if (mark < len && uri[mark] == '?')
00496                    {
00497                    for (++mark, i=mark; i < len && uri[i] != '#'; ++i) {}
00498                    query = decode (uri[mark..i]);
00499                    mark = i;
00500                    }
00501 
00502                 // isolate fragment
00503                 if (mark < len && uri[mark] == '#')
00504                     fragment = decode (uri[mark+1..len]);
00505         }
00506         
00507         /***********************************************************************
00508         
00509                 Authority is the section after the scheme, but before the 
00510                 path, query or fragment; it typically represents a host.
00511                
00512                 @code
00513                     ^(([^@]*)@?)([^:]*)?(:(.*))?
00514                      12         3       4 5
00515                   
00516                 2 isolates userinfo
00517                 3 isolates host
00518                 5 isolates port
00519                 @endcode
00520 
00521         ***********************************************************************/
00522 
00523         private void parseAuthority (char[] auth)
00524         {
00525                 int     mark,
00526                         len = auth.length;
00527 
00528                 // get userinfo: (([^@]*)@?)
00529                 foreach (int i, char c; auth)
00530                          if (c == '@')
00531                             {
00532                             userinfo = decode (auth[0..i]);
00533                             mark = i + 1;
00534                             break;
00535                             }
00536 
00537                 // get port: (:(.*))?
00538                 for (int i=mark; i < len; ++i)
00539                      if (auth [i] == ':')
00540                         {
00541                         port = Text.atoi (auth [i+1..len]);
00542                         len = i;
00543                         break;
00544                         }
00545 
00546                 // get host: ([^:]*)?
00547                 host = auth [mark..len];
00548         }
00549 
00550 
00551         /***********************************************************************
00552         
00553                 Class attributes
00554 
00555         ***********************************************************************/
00556 
00557         const int               InvalidPort = -1;
00558 
00559         private int             port;
00560         private char[]          host,
00561                                 path,
00562                                 query,
00563                                 scheme,
00564                                 userinfo,
00565                                 fragment;
00566         private HeapSlice       decoded;
00567 
00568         private static ubyte    map[256];
00569 
00570                     
00571         private static short[char[]] genericSchemes;
00572 
00573         private static const char[] hexDigits = "0123456789abcdef";
00574 
00575         private static IOException error;
00576 
00577         private enum    {
00578                         ExcScheme       = 0x01, 
00579                         ExcAuthority    = 0x02, 
00580                         ExcPath         = 0x04, 
00581                         ExcQuery        = 0x08, 
00582                         IncUser         = 0x10, 
00583                         IncPath         = 0x20,
00584                         IncQuery        = 0x40,
00585                         IncScheme       = 0x80,
00586                         IncGeneric      = IncScheme | IncUser | IncPath | IncQuery
00587                         };
00588 
00589         private struct SchemePort
00590         {
00591                         char[]  name;
00592                         int     port;
00593         }
00594 
00595         private static  const SchemePort[] schemePorts =
00596                         [
00597                         {"coffee",      80},
00598                         {"file",        InvalidPort},
00599                         {"ftp",         21},
00600                         {"gopher",      70},
00601                         {"hnews",       80},
00602                         {"http",        80},
00603                         {"http-ng",     80},
00604                         {"https",       443},
00605                         {"imap",        143},
00606                         {"irc",         194}, 
00607                         {"ldap",        389},
00608                         {"news",        119},
00609                         {"nfs",         2049}, 
00610                         {"nntp",        119},
00611                         {"pop",         110}, 
00612                         {"prospero",    1525},
00613                         {"rwhois",      4321},
00614                         {"sip",         InvalidPort},
00615                         {"sips",        InvalidPort},
00616                         {"sipt",        InvalidPort},
00617                         {"sipu",        InvalidPort},
00618                         {"shttp",       80},
00619                         {"smtp",        25},
00620                         {"snews",       563},
00621                         {"telnet",      23},
00622                         {"vemmi",       575},
00623                         {"videotex",    516},
00624                         {"wais",        210},
00625                         {"whois",       43},
00626                         {"whois++",     43},
00627                         ];
00628 }
00629 
00630 
00631 /*******************************************************************************
00632 
00633         Mutable version of Uri
00634 
00635 *******************************************************************************/
00636 
00637 class MutableUri : Uri
00638 {
00639         /***********************************************************************
00640         
00641                 Create an empty Uri
00642 
00643         ***********************************************************************/
00644 
00645         this ()
00646         {
00647                 super();
00648         }
00649 
00650         /***********************************************************************
00651         
00652                 Create a Uri from the provided text string.
00653 
00654         ***********************************************************************/
00655 
00656         this (char[] uri)
00657         {
00658                 super (uri);
00659         }
00660 
00661         /***********************************************************************
00662         
00663                 Construct a Uri from the given components. The query is
00664                 optional.
00665                 
00666         ***********************************************************************/
00667 
00668         this (char[] scheme, char[] host, char[] path, char[] query = null)
00669         {
00670                 super();
00671 
00672                 this.scheme = scheme;
00673                 this.query = query;
00674                 this.host = host;
00675                 this.path = path;
00676         }
00677 
00678         /***********************************************************************
00679         
00680                 Clone another Uri. This can be used to make a MutableUri
00681                 from an immutable Uri.
00682 
00683         ***********************************************************************/
00684 
00685         static MutableUri clone (Uri uri)
00686         {
00687                 with (uri)
00688                      {
00689                      MutableUri ret = new MutableUri (scheme, host, path, query);
00690                      ret.userinfo = userinfo;
00691                      ret.fragment = fragment;
00692                      ret.port = port;
00693                      return ret;
00694                      }
00695         }
00696 
00697         /***********************************************************************
00698         
00699                 Clear everything to null.
00700 
00701         ***********************************************************************/
00702 
00703         void reset()
00704         {
00705                 decoded.reset();
00706                 port = InvalidPort;
00707                 host = path = query = scheme = userinfo = fragment = null;
00708         }
00709 
00710         /***********************************************************************
00711         
00712                 Parse the given uri string
00713 
00714         ***********************************************************************/
00715 
00716         MutableUri parse (char[] uri)
00717         {       
00718                 super.parse (uri);
00719                 return this;
00720         }
00721 
00722         /***********************************************************************
00723                 
00724                 Set the Uri scheme
00725 
00726         ***********************************************************************/
00727 
00728         MutableUri setScheme (char[] scheme)
00729         {
00730                 this.scheme = scheme;
00731                 return this;
00732         }
00733 
00734         /***********************************************************************
00735         
00736                 Set the Uri host
00737 
00738         ***********************************************************************/
00739 
00740         MutableUri setHost (char[] host)
00741         {
00742                 this.host = host;
00743                 return this;
00744         }
00745 
00746         /***********************************************************************
00747         
00748                 Set the Uri port
00749 
00750         ***********************************************************************/
00751 
00752         MutableUri setPort (int port)
00753         {
00754                 this.port = port;
00755                 return this;
00756         }
00757 
00758         /***********************************************************************
00759         
00760                 Set the Uri userinfo
00761 
00762         ***********************************************************************/
00763 
00764         MutableUri setUserInfo(char[] userinfo)
00765         {
00766                 this.userinfo = userinfo;
00767                 return this;
00768         }
00769 
00770         /***********************************************************************
00771         
00772                 Set the Uri query
00773 
00774         ***********************************************************************/
00775 
00776         MutableUri setQuery (char[] query)
00777         {
00778                 this.query = query;
00779                 return this;
00780         }
00781 
00782         /***********************************************************************
00783         
00784                 Set the Uri path
00785 
00786         ***********************************************************************/
00787 
00788         MutableUri setPath (char[] path)
00789         {
00790                 this.path = path;
00791                 return this;
00792         }
00793 
00794         /***********************************************************************
00795         
00796                 Set the Uri fragment
00797 
00798         ***********************************************************************/
00799 
00800         MutableUri setFragment (char[] fragment)
00801         {
00802                 this.fragment = fragment;
00803                 return this;
00804         }
00805 }
00806 
00807 
00808 

Generated on Tue Jan 25 21:18:25 2005 for Mango by doxygen 1.3.6