archie/release/B/scripts/filter_anonftp_unix_bsd
2024-05-28 16:48:29 +02:00

153 lines
4.1 KiB
Perl
Executable File

#!/usr/local/bin/perl
#
# $Id: filter_anonftp_unix_bsd,v 1.1 1995/01/11 08:03:50 lucb Exp $
#
$non_crud = $perm_denied = $not_found = "";
while( <> ){
next_one:
# Stomp on carriage returns
s/\015//g;
# Try and spot crud in the line and avoid it
# You can get:
# -rw-r--r-ls: navn/internett/RCS/nsc,v: Permission denied
# ls: navn/internett/RCS/bih,v: Permission denied
# - 1 43 daemon 1350 Oct 28 14:03 sognhs
# -rwcannot access .stuff/incoming
# cannot access .stuff/.cshrc
# UX:ls * .. this was found in IRIX systems recently. The whole line
# should be ignored (Bibi Oct-5-95)
if( m%^(.*)/bin/ls:.*Permission denied% ||
m%^UX:ls:.*Permission denied% ||
m%^(.*)ls:.*Permission denied% ||
m%^(.*)(cannot|can not) access % ){
if( ! $non_crud ){
$non_crud = $1;
}
next;
}
# Also try and spot non ls "Permission denied" messages. These
# are a LOT harder to handle as the key part is at the end
# of the message. For now just zap any line containing it
# and the first line following (as it will PROBABLY have been broken).
#
if( /.:\s*Permission denied/ ){
$perm_denied = 1;
next;
}
if( /.:\s*No permission/ ) {
$perm_denied = 1;
next;
}
if( $perm_denied ){
$perm_denied = "";
# warn "Warning: input corrupted by \"Permission denied\" errors, about line $. of $lsparse'name\n";
next;
}
# Not found's are like Permission denied's. They can start part
# way through a line but with no way of spotting where they begin
if( /not found/ ){
$not_found = 1;
next;
}
if( $not_found ){
$not_found = "";
# warn "Warning: input corrupted by \"not found\" errors, about line $. of $lsparse'name\n";
next;
}
if( $non_crud ){
$_ = $non_crud . $_;
$non_crud = "";
}
# Bibi added this to take care of the AIX listings where the D/F replace the
# d/- in Sunos listings.
if(/^([DF][-rwxSsTt]{9}).*\s(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s+(.*)$/ ){
s%^D%d%;
s%^F%-%;
}
# Fixes the year's trailing spaces for AIX listings.
if( /^([-dl][-rwxSsTt]{9}).*\s(\d+)\s*\w\w\w\s+\d+(\s(\d\d\d\d)\s\s)(.*)$/ ){
$subs = $4;
s%$3% $subs %;
}
# Bibi added this to take of filenames tht start with \n!! I ignore it and
# ignore the following lines if they are still part of this filename.
if(/^([-dl][-rwxSsTt]{9}).*\s(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s(\n)$/ ){
while($_ = <>){
if(/^([-dl][-rwxSsTt]{9}).*\s(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s+(.*)$/ ){
goto next_one;
}else{
if( /:(\s)*$/ ){
print "\n";
goto next_one;
}
next;
}
}
next;
}
if( (/^([-dl][-rwxSsTtl]{9}).*\s(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s(\.|\.\.)$/ ) ||
/^total[ ][ ]*[0-9][0-9]*[ ]*$/ ||
/^ls:(\s*)total[ ][ ]*[0-9][0-9]*[ ]*$/ ||
/^Total:[ ][ ]*[0-9][0-9]*[ ]*kbytes$/ ||
/^opendir:.*$/ ||
/^\.:$/ ||
/^\.$/ ||
(/^.*[ ][ ]*unreadable?[ ]*$/ &&
!(/^([-dl][-rwxSsTtl]{9}).*/) )||
/^[cb][-rwxSsTt]\{9\}/ ||
/^ld.so: warning: / ||
/^crt0: no / ||
# These next 4 lines are to remove the crap at the start of the
# nikhefh.nikhef.nl listing
/This is:/ ||
/NIKHEF-H, National Institute/ ||
/Kruislaan 409, P.O. Box/ ||
/Questions to:/ ||
# Not sure about where this one comes from.
/WE ARE IN EUROPE/ ){
next;
}
s%^\./%%;
s%^/usr/spool/ftp/%%;
s%^/pub/%%;
s%^/usr/local/pub/%%;
s%^/home/ftp/pub/%%;
s%^/ftp/pub/%%;
s%^/com/ftp/pub/%%;
s%^/com/ftp/sun4/pub/%%;
s%^/var/spool/uucppublic/%%;
# -rw-r--r--+ from ghost.dsi.unimi.it (lucb)
s%^([-cbdlrwxSsTt]{10})\+%$1 %;
local( $dir_line );
s%^([-dl][-rwxSsTt]{9})([0-9]+)%$1 $2%;
if( /^([-dl][-rwxSsTt]{9}).*\s(\d+)\s*(\w\w\w\s+\d+\s*(\d+:\d+|\d\d\d\d))\s+(.*)$/ ){
$file_line = 1;
s,/$,,;
}
elsif( /:$/ ){
$dir_line = 1;
}
elsif( /\S/ && ! /\S+\s\S+/ ){
# Probably a start of directory without a trailing :
# (export.lcs.mit.edu outputs these)
s/$/:/;
$dir_line = 1;
}
if( $dir_line && ! $last_was_blank ){
print "\n";
}
print;
$last_was_blank = /^$/;
$last_was_dir = $dir_line;
}