Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

Uri.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file Uri.d
00004         
00005         Copyright (C) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026 
00027                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00028 
00029         
00030         @version        Initial version, April 2004      
00031         @author         Kris
00032 
00033 
00034 *******************************************************************************/
00035 
00036 module mango.io.Uri;
00037 
00038 private import  std.ctype;
00039 
00040 private import  mango.utils.Text,
00041                 mango.utils.HeapSlice;
00042 
00043 private import  mango.io.Buffer,
00044                 mango.io.Exception;
00045 
00046 private import  mango.io.model.IWriter;
00047 
00048 /*******************************************************************************
00049 
00050 *******************************************************************************/
00051 
00052 extern (C) char* memchr (char *, char, uint);
00053 
00054 /*******************************************************************************
00055 
00056         Implements an RFC 2396 compliant URI specification. See 
00057         <A HREF="http://ftp.ics.uci.edu/pub/ietf/uri/rfc2396.txt">this page</A>
00058         for more information. 
00059 
00060         The implementation fails the spec on two counts: it doesn't insist
00061         on a scheme being present in the Uri, and it doesn't implement the
00062         "Relative References" support noted in section 5.2. Note that IRI
00063         support can be added by assuming each of userinfo, path, query, and 
00064         fragment are UTF-8 encoded 
00065         (see <A HREF="http://www.w3.org/2001/Talks/0912-IUC-IRI/paper.html">
00066         this page</A> for further details).
00067 
00068         Use the MutableUri derivative where you need to alter specific uri
00069         attributes. 
00070 
00071 *******************************************************************************/
00072 
00073 class Uri : IWritable
00074 {
00075         /***********************************************************************
00076         
00077                 Initialize the Uri character maps and so on
00078 
00079         ***********************************************************************/
00080 
00081         static this ()
00082         {
00083                 error = new IOException ("Invalid URI specification");
00084 
00085                 // Map known generic schemes to their default port. Specify
00086                 // InvalidPort for those schemes that don't use ports. Note
00087                 // that a port value of zero is not supported ...
00088                 foreach (SchemePort sp; schemePorts)
00089                          genericSchemes[sp.name] = sp.port;
00090                 genericSchemes.rehash;
00091 
00092                 // load the character map with valid symbols
00093                 for (int i='a'; i <= 'z'; ++i)  
00094                      map[i] = IncGeneric;
00095 
00096                 for (int i='A'; i <= 'Z'; ++i)  
00097                      map[i] = IncGeneric;
00098 
00099                 for (int i='0'; i<='9'; ++i)  
00100                      map[i] = IncGeneric;
00101 
00102                 // exclude these from parsing elements
00103                 map[':'] |= ExcScheme;
00104                 map['/'] |= ExcScheme | ExcAuthority;
00105                 map['?'] |= ExcScheme | ExcAuthority | ExcPath;
00106                 map['#'] |= ExcScheme | ExcAuthority | ExcPath | ExcQuery;
00107 
00108                 // include these as common symbols
00109                 map['-'] |= IncUser | IncQuery;
00110                 map['_'] |= IncUser | IncQuery;
00111                 map['.'] |= IncUser | IncQuery;
00112                 map['!'] |= IncUser | IncQuery;
00113                 map['~'] |= IncUser | IncQuery;
00114                 map['*'] |= IncUser | IncQuery;
00115                 map['\''] |= IncUser | IncQuery;
00116                 map['('] |= IncUser | IncQuery;
00117                 map[')'] |= IncUser | IncQuery;
00118 
00119                 // include these as scheme symbols
00120                 map['+'] |= IncScheme;
00121                 map['-'] |= IncScheme;
00122                 map['.'] |= IncScheme;
00123 
00124                 // include these as userinfo symbols
00125                 map[';'] |= IncUser;
00126                 map[':'] |= IncUser;
00127                 map['&'] |= IncUser;
00128                 map['='] |= IncUser;
00129                 map['+'] |= IncUser;
00130                 map['$'] |= IncUser;
00131                 map[','] |= IncUser;
00132 
00133                 // include these as path symbols
00134                 map['/'] |= IncPath;
00135                 map[';'] |= IncPath;
00136                 map[':'] |= IncPath;
00137                 map['@'] |= IncPath;
00138                 map['&'] |= IncPath;
00139                 map['='] |= IncPath;
00140                 map['+'] |= IncPath;
00141                 map['$'] |= IncPath;
00142                 map[','] |= IncPath;
00143 
00144                 // include these as query symbols
00145                 map[';'] |= IncQuery;
00146                 map['/'] |= IncQuery;
00147                 map['?'] |= IncQuery;
00148                 map[':'] |= IncQuery;
00149                 map['@'] |= IncQuery;
00150                 map['&'] |= IncQuery;
00151                 map['='] |= IncQuery;
00152                 map['+'] |= IncQuery;
00153                 map['$'] |= IncQuery;
00154                 map[','] |= IncQuery;
00155         }
00156         
00157         /***********************************************************************
00158         
00159                 Construct a Uri from the provided character string
00160 
00161         ***********************************************************************/
00162 
00163         this (char[] uri)
00164         {
00165                 this();
00166                 parse (uri);
00167         }
00168 
00169         /***********************************************************************
00170         
00171                 Return the default port for the given scheme. InvalidPort
00172                 is returned if the scheme is unknown, or does not accept
00173                 a port.
00174 
00175         ***********************************************************************/
00176 
00177         final static int getDefaultPort (char[] scheme)
00178         {
00179                 int port = genericSchemes [scheme]; 
00180                 if (! port)
00181                       port = InvalidPort;
00182                 return port;
00183         }
00184 
00185         /***********************************************************************
00186         
00187                 Return the parsed scheme, or null if the scheme was not
00188                 specified
00189 
00190         ***********************************************************************/
00191 
00192         char[] getScheme()
00193         {
00194                 return scheme;
00195         }
00196 
00197         /***********************************************************************
00198         
00199                 Return the parsed host, or null if the host was not
00200                 specified
00201 
00202         ***********************************************************************/
00203 
00204         char[] getHost()
00205         {
00206                 return host;
00207         }
00208 
00209         /***********************************************************************
00210         
00211                 Return the parsed port number, or InvalidPort if the port
00212                 was not provided.
00213 
00214         ***********************************************************************/
00215 
00216         int getPort()
00217         {
00218                 return port;
00219         }
00220 
00221         /***********************************************************************
00222         
00223                 Return a valid port number by performing a lookup on the 
00224                 known schemes if the port was not explicitly specified.
00225 
00226         ***********************************************************************/
00227 
00228         int getValidPort()
00229         {
00230                 if (port == InvalidPort)
00231                     return getDefaultPort (scheme);
00232                 return port;
00233         }
00234 
00235         /***********************************************************************
00236         
00237                 Return the parsed userinfo, or null if userinfo was not 
00238                 provided.
00239 
00240         ***********************************************************************/
00241 
00242         char[] getUserInfo()
00243         {
00244                 return userinfo;
00245         }
00246 
00247         /***********************************************************************
00248         
00249                 Return the parsed path, or null if the path was not 
00250                 provided.
00251 
00252         ***********************************************************************/
00253 
00254         char[] getPath()
00255         {
00256                 return path;
00257         }
00258 
00259         /***********************************************************************
00260         
00261                 Return the parsed query, or null if a query was not 
00262                 provided.
00263 
00264         ***********************************************************************/
00265 
00266         char[] getQuery()
00267         {
00268                 return query;
00269         }
00270 
00271         /***********************************************************************
00272         
00273                 Return the parsed fragment, or null if a fragment was not 
00274                 provided.
00275 
00276         ***********************************************************************/
00277 
00278         char[] getFragment()
00279         {
00280                 return fragment;
00281         }
00282 
00283         /***********************************************************************
00284         
00285                 return whether or not the Uri scheme is considered generic.
00286 
00287         ***********************************************************************/
00288 
00289         bool isGeneric ()
00290         {
00291                 return genericSchemes [scheme] != 0;
00292         }
00293 
00294         /***********************************************************************
00295         
00296                 Write the content of this Uri to the provided buffer. The
00297                 output is constructed per RFC 2396
00298 
00299         ***********************************************************************/
00300 
00301         IBuffer write (IBuffer buf)
00302         {
00303                 if (scheme.length)
00304                     buf.append (scheme).append(":");
00305 
00306 
00307                 if (userinfo.length || host.length || port != InvalidPort)
00308                    {
00309                    buf.append ("//");
00310 
00311                    if (userinfo.length)
00312                        encode (buf, userinfo, IncUser).append("@");

                   if (host.length)
                       buf.append (host);

                   if (port != InvalidPort && port != getDefaultPort(scheme))
                      {
                      char[5] tmp;
                      buf.append(":").append(Text.itoa (tmp, port));
00313                       }
00314                    }
00315 
00316                 if (path.length)
00317                     encode (buf, path, IncPath);
00318 
00319                 if (query.length)
00320                    {
00321                    buf.append ("?");
00322                    encode (buf, query, IncQuery);
00323                    }
00324 
00325                 if (fragment.length)
00326                    {
00327                    buf.append ("#");
00328                    encode (buf, fragment, IncQuery);
00329                    }
00330 
00331                 return buf;
00332         }
00333 
00334         /***********************************************************************
00335         
00336                 Write the content of this Uri to the provided writer. The
00337                 output is constructed per RFC 2396
00338 
00339         ***********************************************************************/
00340 
00341         void write (IWriter writer)
00342         {
00343                 write (writer.getBuffer);
00344         }
00345 
00346         /***********************************************************************
00347         
00348                 Convert this Uri to a character string
00349 
00350         ***********************************************************************/
00351 
00352         override char[] toString ()
00353         {
00354                 return write (new GrowableBuffer(1024)).toString;
00355         }
00356 
00357         /***********************************************************************
00358         
00359                 Decode a character string with potential %hex values in it.
00360                 The decoded strings are placed into a thread-safe expanding
00361                 buffer, and a slice of it is returned to the requestor.
00362 
00363         ***********************************************************************/
00364 
00365         char[] decode (char[] s)
00366         {
00367                 int length = s.length;
00368 
00369                 // take a peek first, to see if there's work to do
00370                 if (length && memchr (s, '%', length))
00371                    {
00372                    char* p;
00373                    int   j;
00374                         
00375                    // ensure we have enough decoding space available
00376                    p = cast(char*) decoded.expand (length);
00377 
00378                    // scan string, stripping % encodings as we go
00379                    for (int i; i < length; ++i, ++j, ++p)
00380                        {
00381                        char c = s[i];
00382                        if (c == '%' && (i+2) < length)
00383                           {
00384                           c = Text.atoi (s[i+1..i+3], 16);
00385                           i += 2;
00386                           }
00387                        *p = c;
00388                        }
00389                    // return a slice from the decoded input
00390                    return cast(char[]) decoded.slice (j);
00391                    }
00392 
00393                 // return original content
00394                 return s;
00395         }   
00396 
00397         /***********************************************************************
00398         
00399                 Encode uri characters into an output buffer, such that
00400                 reserved chars are converted into their %hex version.
00401 
00402         ***********************************************************************/
00403 
00404         private static IBuffer encode (IBuffer buf, char[] s, int flags)
00405         {
00406                 char[3] hex;
00407                 int     mark;
00408 
00409                 hex[0] = '%';
00410                 foreach (int i, char c; s)
00411                         {
00412                         if (! (map[c] & flags))
00413                            {
00414                            buf.put (&s[mark], i - mark);
00415                            mark = i+1;
00416                                 
00417                            hex[1] = hexDigits [(c >> 4) & 0x0f];
00418                            hex[2] = hexDigits [c & 0x0f];
00419                            buf.append (hex);
00420                            }
00421                         }
00422 
00423                 // add trailing section
00424                 if (mark < s.length)
00425                     buf.put (&s[mark], s.length - mark);
00426 
00427                 return buf;
00428         }
00429 
00430         /***********************************************************************
00431         
00432                 This should not be exposed outside of this module!
00433 
00434         ***********************************************************************/
00435 
00436         private this ()
00437         {
00438                 port = InvalidPort;
00439                 decoded = new HeapSlice (256);
00440         }
00441 
00442         /***********************************************************************
00443         
00444                 Parsing is performed according to RFC 2396
00445                 
00446                 @code
00447                   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
00448                    12            3  4          5       6  7        8 9
00449                     
00450                 2 isolates scheme
00451                 4 isolates authority
00452                 5 isolates path
00453                 7 isolates query
00454                 9 isolates fragment
00455                 @endcode
00456 
00457                 This was originally a state-machine; it turned out to be a 
00458                 lot faster (~40%) when unwound like this instead.
00459                 
00460         ***********************************************************************/
00461 
00462         private void parse (char[] uri)
00463         {
00464                 char    c;
00465                 int     i, 
00466                         mark, 
00467                         len = uri.length;
00468 
00469                 // isolate scheme (note that it's OK to not specify a scheme)
00470                 for (i=0; i < len && !(map[c = uri[i]] & ExcScheme); ++i) {}
00471                 if (c == ':')
00472                    {
00473                    scheme = uri[mark..i];
00474                    Text.tolower (scheme);
00475                    mark = i + 1;
00476                    }
00477 
00478                 // isolate authority
00479                 if (mark < len-1  &&  uri[mark] == '/'  &&  uri[mark+1] == '/')
00480                    {
00481                    for (mark+=2, i=mark; i < len && !(map[uri[i]] & ExcAuthority); ++i) {}
00482                    parseAuthority (uri[mark..i]); 
00483                    mark = i;
00484                    }
00485 
00486                 // isolate path
00487                 for (i=mark; i < len && !(map[uri[i]] & ExcPath); ++i) {}
00488                 path = decode (uri[mark..i]);
00489                 mark = i;
00490 
00491                 // isolate query
00492                 if (mark < len && uri[mark] == '?')
00493                    {
00494                    for (++mark, i=mark; i < len && uri[i] != '#'; ++i) {}
00495                    query = decode (uri[mark..i]);
00496                    mark = i;
00497                    }
00498 
00499                 // isolate fragment
00500                 if (mark < len && uri[mark] == '#')
00501                     fragment = decode (uri[mark+1..len]);
00502         }
00503         
00504         /***********************************************************************
00505         
00506                 Authority is the section after the scheme, but before the 
00507                 path, query or fragment; it typically represents a host.
00508                
00509                 @code
00510                     ^(([^@]*)@?)([^:]*)?(:(.*))?
00511                      12         3       4 5
00512                   
00513                 2 isolates userinfo
00514                 3 isolates host
00515                 5 isolates port
00516                 @endcode
00517 
00518         ***********************************************************************/
00519 
00520         private void parseAuthority (char[] auth)
00521         {
00522                 int     mark,
00523                         len = auth.length;
00524 
00525                 // get userinfo: (([^@]*)@?)
00526                 foreach (int i, char c; auth)
00527                          if (c == '@')
00528                             {
00529                             userinfo = decode (auth[0..i]);
00530                             mark = i + 1;
00531                             break;
00532                             }
00533 
00534                 // get port: (:(.*))?
00535                 for (int i=mark; i < len; ++i)
00536                      if (auth [i] == ':')
00537                         {
00538                         port = Text.atoi (auth [i+1..len]);
00539                         len = i;
00540                         break;
00541                         }
00542 
00543                 // get host: ([^:]*)?
00544                 host = auth [mark..len];
00545         }
00546 
00547 
00548         /***********************************************************************
00549         
00550                 Class attributes
00551 
00552         ***********************************************************************/
00553 
00554         const int               InvalidPort = -1;
00555 
00556         private int             port;
00557         private char[]          host,
00558                                 path,
00559                                 query,
00560                                 scheme,
00561                                 userinfo,
00562                                 fragment;
00563         private HeapSlice       decoded;
00564 
00565         private static ubyte    map[256];
00566 
00567                     
00568         private static short[char[]] genericSchemes;
00569 
00570         private static const char[] hexDigits = "0123456789abcdef";
00571 
00572         private static IOException error;
00573 
00574         private enum    {
00575                         ExcScheme       = 0x01, 
00576                         ExcAuthority    = 0x02, 
00577                         ExcPath         = 0x04, 
00578                         ExcQuery        = 0x08, 
00579                         IncUser         = 0x10, 
00580                         IncPath         = 0x20,
00581                         IncQuery        = 0x40,
00582                         IncScheme       = 0x80,
00583                         IncGeneric      = IncScheme | IncUser | IncPath | IncQuery
00584                         };
00585 
00586         private struct SchemePort
00587         {
00588                         char[]  name;
00589                         int     port;
00590         }
00591 
00592         private static  const SchemePort[] schemePorts =
00593                         [
00594                         {"coffee",      80},
00595                         {"file",        InvalidPort},
00596                         {"ftp",         21},
00597                         {"gopher",      70},
00598                         {"hnews",       80},
00599                         {"http",        80},
00600                         {"http-ng",     80},
00601                         {"https",       443},
00602                         {"imap",        143},
00603                         {"irc",         194}, 
00604                         {"ldap",        389},
00605                         {"news",        119},
00606                         {"nfs",         2049}, 
00607                         {"nntp",        119},
00608                         {"pop",         110}, 
00609                         {"prospero",    1525},
00610                         {"rwhois",      4321},
00611                         {"sip",         InvalidPort},
00612                         {"sips",        InvalidPort},
00613                         {"sipt",        InvalidPort},
00614                         {"sipu",        InvalidPort},
00615                         {"shttp",       80},
00616                         {"smtp",        25},
00617                         {"snews",       563},
00618                         {"telnet",      23},
00619                         {"vemmi",       575},
00620                         {"videotex",    516},
00621                         {"wais",        210},
00622                         {"whois",       43},
00623                         {"whois++",     43},
00624                         ];
00625 }
00626 
00627 
00628 /*******************************************************************************
00629 
00630         Mutable version of Uri
00631 
00632 *******************************************************************************/
00633 
00634 class MutableUri : Uri
00635 {
00636         /***********************************************************************
00637         
00638                 Create an empty Uri
00639 
00640         ***********************************************************************/
00641 
00642         this ()
00643         {
00644                 super();
00645         }
00646 
00647         /***********************************************************************
00648         
00649                 Create a Uri from the provided text string.
00650 
00651         ***********************************************************************/
00652 
00653         this (char[] uri)
00654         {
00655                 super (uri);
00656         }
00657 
00658         /***********************************************************************
00659         
00660                 Construct a Uri from the given components. The query is
00661                 optional.
00662                 
00663         ***********************************************************************/
00664 
00665         this (char[] scheme, char[] host, char[] path, char[] query = null)
00666         {
00667                 super();
00668 
00669                 this.scheme = scheme;
00670                 this.query = query;
00671                 this.host = host;
00672                 this.path = path;
00673         }
00674 
00675         /***********************************************************************
00676         
00677                 Clone another Uri. This can be used to make a MutableUri
00678                 from an immutable Uri.
00679 
00680         ***********************************************************************/
00681 
00682         static MutableUri clone (Uri uri)
00683         {
00684                 with (uri)
00685                      {
00686                      MutableUri ret = new MutableUri (scheme, host, path, query);
00687                      ret.userinfo = userinfo;
00688                      ret.fragment = fragment;
00689                      ret.port = port;
00690                      return ret;
00691                      }
00692         }
00693 
00694         /***********************************************************************
00695         
00696                 Clear everything to null.
00697 
00698         ***********************************************************************/
00699 
00700         void reset()
00701         {
00702                 decoded.reset();
00703                 port = InvalidPort;
00704                 host = path = query = scheme = userinfo = fragment = null;
00705         }
00706 
00707         /***********************************************************************
00708         
00709                 Parse the given uri string
00710 
00711         ***********************************************************************/
00712 
00713         MutableUri parse (char[] uri)
00714         {       
00715                 super.parse (uri);
00716                 return this;
00717         }
00718 
00719         /***********************************************************************
00720                 
00721                 Set the Uri scheme
00722 
00723         ***********************************************************************/
00724 
00725         MutableUri setScheme (char[] scheme)
00726         {
00727                 this.scheme = scheme;
00728                 return this;
00729         }
00730 
00731         /***********************************************************************
00732         
00733                 Set the Uri host
00734 
00735         ***********************************************************************/
00736 
00737         MutableUri setHost (char[] host)
00738         {
00739                 this.host = host;
00740                 return this;
00741         }
00742 
00743         /***********************************************************************
00744         
00745                 Set the Uri port
00746 
00747         ***********************************************************************/
00748 
00749         MutableUri setPort (int port)
00750         {
00751                 this.port = port;
00752                 return this;
00753         }
00754 
00755         /***********************************************************************
00756         
00757                 Set the Uri userinfo
00758 
00759         ***********************************************************************/
00760 
00761         MutableUri setUserInfo(char[] userinfo)
00762         {
00763                 this.userinfo = userinfo;
00764                 return this;
00765         }
00766 
00767         /***********************************************************************
00768         
00769                 Set the Uri query
00770 
00771         ***********************************************************************/
00772 
00773         MutableUri setQuery (char[] query)
00774         {
00775                 this.query = query;
00776                 return this;
00777         }
00778 
00779         /***********************************************************************
00780         
00781                 Set the Uri path
00782 
00783         ***********************************************************************/
00784 
00785         MutableUri setPath (char[] path)
00786         {
00787                 this.path = path;
00788                 return this;
00789         }
00790 
00791         /***********************************************************************
00792         
00793                 Set the Uri fragment
00794 
00795         ***********************************************************************/
00796 
00797         MutableUri setFragment (char[] fragment)
00798         {
00799                 this.fragment = fragment;
00800                 return this;
00801         }
00802 }
00803 
00804 
00805 

Generated on Sun Nov 7 19:06:54 2004 for Mango by doxygen 1.3.6