Article 6614 of comp.lang.perl: Xref: feenix.metronet.com comp.lang.perl:6614 Path: feenix.metronet.com!news.ecn.bgu.edu!usenet.ins.cwru.edu!howland.reston.ans.net!agate!ames!purdue!not-for-mail From: spaf@cs.purdue.edu (Gene Spafford) Newsgroups: comp.lang.perl Subject: Re: Perl .newsrc cleanup script? Date: 10 Oct 1993 10:47:30 -0500 Organization: Department of Computer Sciences, Purdue University Lines: 175 Message-ID: <299aqiINNe4o@uther.cs.purdue.edu> References: <2918j2$7r0@snoopy.cis.ufl.edu> NNTP-Posting-Host: uther.cs.purdue.edu In-reply-to: spp@dragonfly.cis.ufl.edu's message of 7 Oct 1993 14:20:18 GMT The following doesn't do exactly what Steve requested, and it is *realy* ugly and it has localisms for our news and my preference for newsgroup order, but....it works. This takes as argument the .newsrc file and the news/lib/active file. It updates all the range lists in newsgroup lines, and sorts them according to a prespecified order (see the pattern list). Then it writes out a new copy with ! lines at the end. Happy hacking. #!/usr/local/perl/perl -s @Patterns = ( 'announce', 'general', '^[^.]+$', '^serc\.', '^rec\.humor\.funny', 'comp\.risks', '^purdue\.', '^in\.', '^ieee\.', '^gnu\.', '^news\.', '^comp\.', '^sci\.', '^misc\.', '^rec\.', '^soc\.', '^alt\.', '^talk\.'); die "usage: $0 newsrc-file [active-file]\n" unless ($Fname = shift); open(NRC, "<$Fname") || die "Cannot open $Fname: $!"; (rename ($Fname, "$Fname.old") || die "Cannot rename $Fname: $!") unless $debug; $, = $\ = "\n"; @slurp = (); close NRC; chop @slurp; warn (scalar(@slurp), " newsgroup lines found in $Fname\n") if $debug; if ($active = shift) { open(ACTIVE, "<$active") || die "Cannot open active file $active: $!"; while () { split; $Low{@_[0]} = @_[2]+0; $High{@_[0]} = @_[1]+0; } close ACTIVE; $index = 0; until ($index > $#slurp) { ($group) = $slurp[$index] =~ m/^(\S+)[:!]/; next if defined $Low{$group}; warn "Deleting bogus group: $group\n"; splice (@slurp, $index, 1) && redo; } continue {++$index;} for ($_ = 0; $_ <= $#slurp; ++$_) { ($group, $sep, $numbs) = $slurp[$_] =~ m/^(\S+)([:!])\s*(\d+.*)$/; next unless $numbs =~ m/\S/; warn "Group $group has read $numbs\n" if $debug; $numbs = &CheckRead($group, $numbs); warn "New range is $numbs\n\n" if $debug; $slurp[$_] = "$group$sep $numbs"; } } foreach $_ (sort {$a cmp $b;} @slurp) { push (@On, $_) if m/:/; push (@Off, $_) unless m/:/; warn "Don't know what to do about: $_\n" unless m/[:!]/; } open(NEW, ">$Fname") || die "Cannot open $Fname: $!" unless $debug; open(NEW, "|cat") if $debug; foreach $pattern (@Patterns) { eval <<"EOF" ; \$index = $[; until (\$index > \$#On) { next unless \$On[\$index] =~ m/$pattern/o; print NEW splice(\@On, \$index, 1, ()); redo; } continue {++\$index;} EOF warn $@ if $@; } print NEW @On, @Off; close NEW; exit; sub CheckRead { local($group, $numbers) = @_; local($low, $high, $result) = ($Low{$group}, $High{$group}, ''); local(@bool, $start, $flag, $next, $first, $index); warn "active file has $low to $high for $group\n" if $debug; # Simplest case -- all articles expired (empty group) return "1-$high" if $high < $low; # Next simplest case -- I'm already caught up $numbers =~ m/^\s+((\d+)-(\d+))$/ && $2 == 1 && $3 == $high && return $1; # We construct a boolean array indicating what has been seen foreach $index (split(/,/, $numbers)) { if ($index =~ m/(\d+)-(\d+)/) { next if $2 < $low; $first = $1 < $low ? $low : $1; foreach $_ ($first .. $2) {$bool[$_] = 1;} } else { $bool[$index]++ unless $index < $low; } } warn "max article read is $#bool\n" if $debug; # If the most recently-read article has expired, the case is simple if ($#bool < $low) { $low--; return "1-$low"; } # If the group has been here awhile, we can reduce the array elsif ($low > 1) { $start = $low-1; @bool = @bool[$start .. $high]; $flag = 1; $bool[0] = 0; } # Now step through for each set of values while (($next = &NextVal(1)) != -1) { $start += $next; $next = &NextVal(0); if ($next < 0) { $result .= ",$start-$high" unless $flag; last; } elsif ($next == 0) { if ($flag) { $result = $result . ",1-" . (($start == $low) ? "$start" : $low-1 . ",$start"); $flag = 0; } else { $result .= ",$start"; } $start += 2; } else { $next += $start; $start = 1 if $flag; $flag = 0; $result .= ",${start}-$next"; $start = $next + 2; } } return "1-$high" unless $result =~ m/\S/; substr($result, 1); # chop the leading , } sub NextVal { local($val) = @_; local($count, $tmp) = 0; local($arrsize) = $#bool; while ($count <= $arrsize) { $tmp = shift @bool; return $count if ($tmp && $val); return $count unless ($tmp || $val); $count++; } -1; } -- Gene Spafford, COAST Project Director Software Engineering Research Center & Dept. of Computer Sciences Purdue University, W. Lafayette IN 47907-1398 Internet: spaf@cs.purdue.edu phone: (317) 494-7825