urlparse.pm100700 312 310 4657 7057062677 12426 0ustar nahajsorcererpackage urlparse; =head1 NAME urls - perl module to deal with parsing and building URL's in accordance with RFC2396. =head1 SYNOPSIS use urlparse qw(&parseuri &builduri); ($scheme, $authority, $path, $query, $fragment) = &parseuri ($given); $url = &builduri ($scheme, $authority, $path, $query, $fragment); =head1 DESCRIPTION There are a lot of "gotcha's" in dealing with parsing and building URL's. There is enough confusion, and missimplemention, that they finally issued an RFC (RFC 2396) that clairifies the procedures. This code just implements that RFC. =head1 AUTHOR John Halleck =head1 COPYRIGHT (C) Copyright 2000 by John Halleck. =cut require 5.0; # Not a prayer with perl 4 use strict; use vars qw ($VERSION @ISA @EXPORT_OK); @ISA = qw (Exporter); @EXPORT_OK = qw(&parseuri &builduri); $VERSION = 1.0; # ============== Deal with URL's. sub parseuri { # parse a uri in the manner recomended by RFC 2396 # (We shamelessly steal the regular expression from that document # to insure we comply with it.) my $given = shift; die ("No URI given") if !defined $given; my ($scheme, $authority, $path, $query, $fragment) = ($given =~ m;^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$;) [2-1, 4-1, 5-1, 7-1, 9-1] ; # Note shift by one to match perl convention. return ($scheme, $authority, $path, $query, $fragment); } # ------------------ sub builduri { # build a URI from it's components. (RFC 2396 5.2.7) my $scheme = shift; my $authority = shift; my $path = shift; $path = '' if !defined $path; # Really an error, but what to do? my $query = shift; my $fragment = shift; # Code directly transliterated from the RFC. % escaping added to match parsing # restrictions in parseuri. (Which are the official ones) my $result = ''; if (defined $scheme) { $scheme =~ s/:/%3A/g; $scheme =~ s;/;%2F;g; $scheme =~ s/\?/%3F/g; $scheme =~ s/\#/%23/g; $result .= $scheme . ':'; } if (defined $authority) { $authority =~ s:/:%2F:g; $authority =~ s/\?/%3F/g; $authority =~ s/\#/%23/g; $result .= '//' . $authority; } $path =~ s/\?/%3F/g; $path =~ s/\#/%23/g; $result .= $path; if (defined $query) { $query =~ s/\#/%23/g; $result .= '?' . $query; } if (defined $fragment) { $result .= '#' . $fragment; } # print "DEBUG: builduri returning $result\n"; return $result; } 1; # End Package paths.pm100700 312 310 7141 7055607450 11667 0ustar nahajsorcererpackage paths; =head1 NAME paths - perl module to deal with absolute and relative paths in accordance with RFC2396. =head1 SYNOPSIS use paths qw(&makepathrel &makepathabs) $relative = &makepathrel (&givenpath, &basepath); &makepathrel ('/a/b', '/a/b/c/') returns "../" $absolute = &makepathabs (&givenpath, &basepath); &makepathabs ('../', '/a/b/c/') returns "/a/b"; =head1 DESCRIPTION There are a lot of "gotcha's" in dealing with relative and absolute paths. There is enough confusion, and missimplemention, that they finally issued an RFC that clairifies the procedure. This code just implements that RFC. =head1 AUTHOR John Halleck =head1 COPYRIGHT (C) Copyright 2000 by John Halleck. =cut require 5.0; # Not a prayer with perl 4. use strict; use vars qw ($VERSION @ISA @EXPORT_OK); @ISA = qw (Exporter); @EXPORT_OK = qw(&makepathabs &makepathrel); $VERSION = 1.0; # ------------------ sub makepathabs { # this is a direct coding of RFC 2396 5.2.6 my $given = shift; my $base = shift; if (!defined $base) { die "No base path given" } if (!defined $given) { die "No relative path given" } if ($given =~ m;^/;) { return $given } # RFC 2396 5.2.6a my $result = $base; if ($result =~ m;^(.*/)[^/]+$;) { $result = $1 } # RFC 2396 5.2.6b $result .= $given; # Just to make search patterns easier. my $startingslash = $result =~ m:^/: ; if (!$startingslash) { $result = '/' . $result } # RFC 2396 5.2.6c Remove /.'s $result =~ s:/\./:/:g; # RFC 2396 5.2.6d Remove trailing /. $result =~ s:/\.$:/: ; # RFC 2396 5.2.6e Remove //../'s while ($result =~ m;^(.*?)/[^/]*/\.\./(.*)$;) { $result = "$1/$2" } # RFC 2396 5.2.6f Remove trailing //.. if ($result =~ m;^(.*/)([^/]*)/\.\.$; && $2 ne '..') { $result = $1} # RFC 2396 5.2.6g if ($result eq '/..') { $result = '' } # Some implementations MAY... else { while ($result =~ m;^(.*)/\.\.(/.*)$;) { $result = $1 . $2 } # Some implementations MAY... } # Undo the leading / we put on to simplify pattern matches. if (!$startingslash && $result =~ m;^/;) { $result =~ s;^/;; } return $result; } # ----------------- sub makepathrel { my $given = shift; my $base = shift; if (!defined $given) { return '' } if ($given !~ m;^/(.*)$;) { return $given } # not given an abs. $given = $1; if (!defined $base) { return $given } # This is an error, but what to do? if ($base !~ m;^/(.*)$;) { return $given } # really an error. $base = $1; # Were we handed the same thing? if ($base eq $given) { if ($given =~ m:/$:) { return './' } else { return '.' } } # We don't really care about non directory part of the base if they are unequal if ($base =~ m:^(.*/)[^/]+$:) { $base = $1 } # a/b/c/d/ a/b/g/h => c/d/ g/h my ($testbase, $restbase, $testgiven, $restgiven); my $didone = 0; while (1) { # print "DEBUG: Making relative $given, $base\n"; if ($base !~ m:^([^/]*)/(.*)$:) { last } $testbase = $1; $restbase = $2; if ($given !~ m:^([^/]*)/(.*)$:) { last } $testgiven = $1; $restgiven = $2; if ($testbase eq $testgiven) { $base = $restbase; $given = $restgiven; $didone = 1; } else { last } } # What if nothing in common? Then just give it as abs. if (!$didone) { return '/' . $given } # a/b/c d => ../../d while ($base =~ m:^[^/]*/(.*)$:) { $base = $1; $given = '../' . $given; } if ($given eq '' || $given eq '/') { return './' } # print "DEBUG: Making relative returning $given\n"; return $given; } 1; # End package urls.pm100700 312 310 6362 7057063370 11540 0ustar nahajsorcererpackage urls; =head1 NAME urls - perl module to deal with URL's in accordance with RFC2396. =head1 SYNOPSIS use urlpath qw(&makeurlabs &makeurlrel); $absolute = &makeurlabs ('../foo', 'http://www.place/a/b/c') $relative = &makeurlrel ('http://www.place/a/b/d', 'http://www.place/a/b/c') =head1 DESCRIPTION There are a lot of "Gotcha's" dealing with relative and absolute URL's. Some browsers even get it wrong. There is enough confusion, and missimplemention, that they finally issued an RFC (RFC 2396) that clairifies the procedures. This code just implements that RFC. =head1 AUTHOR John Halleck =head1 COPYRIGHT (C) Copyright 2000 by John Halleck. =cut require 5.0; # Not a prayer with perl 4 use strict; use vars qw ($VERSION @ISA @EXPORT_OK); @ISA = qw (Exporter); @EXPORT_OK = qw(&makeurlabs &makeurlrel); $VERSION = 1.0; # in order to deal with relative and absolute URI's, we have to deal with # absolute and relative paths. use paths qw(&makepathabs &makepathrel); use urlparse qw(&parseuri &builduri); # ============== Deal with URL's. sub makeurlabs { # This is modeled directly on the discription in RFC 2396 my $given = shift; my $basescheme = shift; my $baseauthority = shift; my $basepath = shift; my $basequery = shift; my $basefragment = shift; my $result = $given; # Item 5.2.1 from RFC 2396 my ($scheme, $authority, $path, $query, $fragment) = &parseuri ( $given ); # Item 5.2.2 from RFC 2396 if ($path eq '' && !defined $scheme && !defined $authority && !defined $query) { $scheme = $basescheme; $authority = $baseauthority; $path = $basepath; $query = $basequery; # $result = $given; } else { # RFC2396 5.2.3 # ["Some implementations MAY..."] if (defined $scheme && defined $basescheme && $scheme eq $basescheme && $scheme =~ /^(https?|ftp|gopher)$/) { $scheme = undef } if (!defined $scheme) { $scheme = $basescheme; # RFC 2396 5.2.4 if (!defined $authority) { $authority = $baseauthority if defined $baseauthority; # RFC 2396 5.2.5 if ($path !~ m;^/;) { # RFC 2396 5.2.6 $path = &makepathabs ($path, $basepath); } } } } # RFC 2396 5.2.7 return &builduri ($scheme, $authority, $path, $query, $fragment); } # ---------------- sub makeurlrel { my $given = shift; my $basescheme = shift; my $baseauthority = shift; my $basepath = shift; my $basequery = shift; my $basefragment = shift; # print "DEBUG: Making URL rel, $given\n"; my ($scheme, $authority, $path, $query, $fragment) = &parseuri ( $given ); if (defined $scheme && defined $basescheme && $scheme eq $basescheme ) { $scheme = undef; if (defined $authority && defined $baseauthority && $authority eq $baseauthority) { $authority = undef; $path = &makepathrel ($path, $basepath); } } if (defined $basequery && defined $query && $basequery eq $query) { $query = undef } if (!defined $scheme && !defined $authority && defined $path && $path =~ m:^\.\/?$: && (defined $query || defined $fragment)) { $path = '' } return &builduri ($scheme, $authority, $path, $query, $fragment); } 1; # End Package