--- webcopy.src.orig Wed Aug 22 03:38:22 2001 +++ webcopy.src Wed Aug 22 04:05:37 2001 @@ -36,6 +36,7 @@ # WebCopy info # $version="0.98b7"; +$agent="Mozilla/4.0 (compatible; MSIE 4.01; Windows 98)"; $agent="WebCopy/$version"; # Setup perl defaults @@ -180,7 +181,7 @@ $keepit{$_}=1; } -unless (($host,$port,$path,$file,$extra)=&split_url($url)) { +unless (($host,$port,$porta,$path,$file,$extra)=&split_url($url)) { die "$0: Malformed http URL\n" unless $path; } @@ -228,7 +229,7 @@ $out=$file; $out=~s!^$basepath!/!o unless $complete; $out.=$index if $out=~m!/$!; - $in="http://$host:$port$file"; + $in="http://$host"."$porta"."$file"; $depth=$cache{$file}; if ($query) { @@ -244,7 +245,7 @@ ($error,$errormsg,$html)=&transfer($file,$out); &log("$error $errormsg\n"); if ($error==302) { - local($h,$n,$p,$f,$e)=&split_url($html); + local($h,$n,$n_ascii,$p,$f,$e)=&split_url($html); &add_url( ($h eq $host && $n eq $port)?"$p$f$e":$html, 1, ""); } elsif (($error==200) || ($error==304)) { $parse=$html || $file=~m!\.html?$!i || $file=~m!/$!; @@ -357,7 +358,7 @@ return; } if ($full && $ref=~/^\w+:/) { - local($h,$n,$p,$f,$e)=&split_url($ref); + local($h,$n,$n_ascii,$p,$f,$e)=&split_url($ref); if ($h ne "localhost" && ($h ne $host || $n ne $port)) { &log(" reject-remote\n"); return; @@ -380,7 +381,9 @@ } } undef $allowfirst; - $ref=~s/\#.*$//; # Remove NAME references + if (($ref=~m!\.htm\#!) || ($ref=~m!\.html\#!) || ($ref=~m!\.shtml\#!)) { + $ref=~s/\#.*$//; # Remove NAME references + } unless ($cgi) { if ($ref=~/\?/) { &log(" reject-cgi\n"); @@ -440,11 +443,15 @@ sub split_url { local($url)=@_; local($host,$port,$path,$file,$extra,$v); - ($url,$extra)=$url=~m!^([^\?\#]*)(.*)$!; + if (($url=~m!\.htm\#!) || ($url=~m!\.html\#!) || ($url=~m!\.shtml\#!)) { + ($url,$extra)=$url=~m!^([^\?\#]*)(.*)$!; + } else { + ($url,$extra)=$url=~m!^([^\?]*)(.*)$!; + } $v=$url=~m!^http:(//([^:/]*)(:(\d*))?)?((/([^/]+/)*)?([^/]*))?$!; return () unless $v; - ($host,$port,$path,$file)=(($2?$2:"localhost"),($4?$4:80),$6,$8); - ($host,$port,$path,$file,$extra); + ($host,$port,$porta,$path,$file)=(($2?$2:"localhost"),($4?$4:80),($4?sprintf(":%d",$4):""),$6,$8); + ($host,$port,$porta,$path,$file,$extra); } sub init_sock { @@ -481,7 +488,8 @@ $_=select(S); $|=1; select($_); if ($verbose>3) { print(($post?"POST":"GET") . " $proxy$url HTTP/1.0${rn}" . - "User-Agent: $agent${rn}Accept: */*${rn}Host: ${host}${rn}$update"); + "Referer: http://"."$host$porta$path${rn}" . + "User-Agent: $agent${rn}Accept: */*${rn}Host: ${host}${porta}${rn}$update"); print "Authorization: $userpass${rn}" if $userpass; if ($post) { print "Content-Length: " . length($post_data) . @@ -492,7 +500,8 @@ } } print S ($post?"POST":"GET") . " $proxy$url HTTP/1.0${rn}" . - "User-Agent: $agent${rn}Accept: */*${rn}Host: ${host}${rn}$update"; + "Referer: http://"."$host$porta$path${rn}" . + "User-Agent: $agent${rn}Accept: */*${rn}Host: ${host}${porta}${rn}$update"; print S "Authorization: $userpass${rn}" if $userpass; if ($post) { print S "Content-Length: " . length($post_data) . @@ -618,6 +627,10 @@ sub get_date { local($file)=@_; return(undef) unless (-e ($_=&real($file))); +# if ( -l $_ ) +# { if ((lstat($_))[5] != 0) { return(undef); }; +# }; +# local($ss,$mm,$hh,$dd,$nn,$yy,$ww)=(gmtime((lstat($_))[9]))[0..6]; local($ss,$mm,$hh,$dd,$nn,$yy,$ww)=(gmtime((stat($_))[9]))[0..6]; return sprintf("%s, %02g %s %04g %02g:%02g:%02g GMT", $weekday[$ww], $dd, $month[$nn],