#!/usr/bin/perl $| = 1; # Addresses are truncated to this many characters my $A = 35; *report = \&report_default; while (<>) { chomp; next unless s/.*qmail: //; s/(\d+\.\d+)\s+//; my $time = $1; if (/^new msg (\d+)/) { if (exists $msg{$1}) { warn "Duplicate message number $1"; next; } $msg{$1}{arrived} = $time; $msg{$1}{msg} = $1; $msg{$1}{bounced} = '-'; } elsif (/^info msg (\d+): bytes (\d+) from <([^>]*)>/) { $msg{$1}{length} = $2; $msg{$1}{from} = $3 || "-"; } elsif (/^starting delivery (\d+): msg (\d+) to (local|remote) (.*)/) { $msg{$2}{del}{$1}{lr} = $3; $msg{$2}{del}{$1}{to} = $4; $msg{$2}{del}{$1}{started} = $time; $delivery{$1} = $2; } elsif (/^delivery (\d+): (success|deferral|failure): (.*)/) { my $msgn = $delivery{$1}; unless (defined $msgn) { warn "Delivery $1 unknown"; next; } $msg{$msgn}{del}{$1}{result} = $2; $msg{$msgn}{del}{$1}{report} = $3; $msg{$msgn}{del}{$1}{finished} = $time; $msg{$msgn}{del}{$1}{elapsed} = $time - $msg{$msgn}{del}{$1}{started}; } elsif (/^end msg (\d+)/) { $msg{$1}{finished} = $time; $msg{$1}{lifetime} = $time - $msg{$1}{arrived}; report($msg{$1}); for my $del (keys %{$msg{$1}{del}}) { delete $delivery{$del}; } delete $msg{$1}; } elsif (/^status:/) { next; } elsif (/^bounce msg (\d+)/) { $msg{$1}{bounced} = 'B'; } else { # warn "Unmtched line: $_\n"; } } sub report_default { my $msg = shift; printf "%6d (%s) %-$ {A}s %7s %1s\n", $msg->{msg}, fmtime($msg->{arrived}), substr($msg->{from}, 0, $A), pbytes($msg->{length}), $msg->{bounced}, ; for my $delno (sort {$msg->{del}{$a}{started} <=> $msg->{del}{$b}{started}} keys %{$msg->{del}}) { my $del = $msg->{del}{$delno}; printf " %6d (%s) %1s %-$ {A}s %1s %6s\n", $delno, fmtime($del->{started}), substr($del->{lr}, 0, 1), substr($del->{to}, 0, $A), uc substr($del->{result}, 0, 1), ptime($del->{elapsed}); } print "\n"; } # Given a number of bytes, return a suitable descriptive string # such as "983 b" or "12 Mb". sub pbytes { my $bytes = shift; my @prefix = ('', 'k', 'M', 'G', 'T'); my $power = 0; until ($bytes < 1024 || $power == $#prefix) { $bytes /= 1024; $power++; } int($bytes+.5) . " $prefix[$power]b"; } # Given a time in Unix epoch format, return a formatted human-readable # representation of it sub fmtime { my $time = shift; my ($s, $m, $h, $dy, $mo, $yr) = localtime($time); $yr+=1900; $mo++; sprintf "%04d/%02d/%02d %2d:%02d:%02d", $yr, $mo, $dy, $h, $m, $s; } # Given a time interval in seconds, return a formatted human-readable # representation of it sub ptime { my $sec = shift; if ($sec < 60) { substr(sprintf("%6f", $sec), 0, 6) . "s"; } elsif ($sec < 3600) { my $m = int($sec/60+.5); my $s = $sec%60; sprintf "%2d:%02dm", $m, $s; } elsif ($sec < 86400) { my $h = int($sec/3600+.5); my $m = int(($sec%3600)/60+.5); sprintf "%2d:%02dh", $h, $m; } elsif ($sec < 99 * 86400) { my $d = int($sec/86400+.5); my $h = int(($sec%86400)/24+.5); sprintf "%2dd%02dh", $d, $h; } else { "(unk)"; } } # This example 'report' function will generate # a table showing all the invalid local addresses to which mail was # sent, and who sent mail there. sub report_bad_local_addresses { my $msg = shift; # my @fields = qw(msg length from to lr status); # my @format = qw(%6d %6d %-33s %-33s %1s %1s); # printf "@format\n", @{$h->{$n}}{@fields}; my $sender = $msg->{from}; for my $delivery (values %{$msg->{del}}) { next unless $delivery->{result} eq 'failure'; next unless $delivery->{lr} eq 'local'; next unless $delivery->{report} eq "Sorry,_no_mailbox_here_by_that_name._(#5.1.1)/"; my $recip = $delivery->{to}; printf "%-38s %-38s\n", $sender, $recip ; # unless $seen{$recip}++; } } # Find interesting and complicated examples to use in the man page sub report_interesting_examples { my $msg = shift; my (%lr, %s); for my $d (values %{$msg->{del}}) { $lr{$d->{lr}} = 1; $s{$d->{result}} = 1; } if (keys(%lr) > 1 && keys(%s) > 1) { report_default($msg); } } =head1 NAME digest-maillog - read qmail log files and generate reports =head1 SYNOPSIS digest-maillog [logfiles...] =head1 DESCRIPTION C reads the log files that were generated by the C mail transfer agent and generates reports from the information it gathers. It reads from the files named on the command line (which, for best results, should be listed in chronological order) or, if no files are named, from the standard input. By default writes its reports to standard output. C accumulates information about each message that is queued. Once the message's ultimate disposition is known, it invokes the function C, passing it a data structure that represents the message. C prints out the information in whatever format is desired. =head2 Default Output Format A typical output of the default C function follows: 706852 (2002/08/13 14:08:06) mjd-clpm-discard@plover.com 3 kb B 207409 (2002/08/13 14:08:08) l mjd-clpm-confirm-registration@plove S 4.2383s 207410 (2002/08/13 14:08:08) r freeoffer1@yahoo.com D 5.0038s 207450 (2002/08/13 14:15:28) r freeoffer1@yahoo.com D 0.8844s 207485 (2002/08/13 14:36:08) r freeoffer1@yahoo.com D 0.6788s 207569 (2002/08/13 15:10:08) r freeoffer1@yahoo.com D 6.6628s 207683 (2002/08/13 15:57:28) r freeoffer1@yahoo.com D 1.1381s 207925 (2002/08/13 16:58:08) r freeoffer1@yahoo.com D 1.2200s 208166 (2002/08/13 18:12:08) r freeoffer1@yahoo.com F 0.8394s The first line gives the message number (C<706852>), the date and time that the message was first queued (August 13, 2002 at 2:08:06 PM), the sender address (C), the approximate message size (3 kilobytes). The C at the end indicates that a bounce message was generated for this message; if the message had not bounced, the C would have been a C<-> instead. The following lines summarize the delivery attempts for this message. The first line has the delivery number (C<207409>), the time at which the delivery attempt was initiated (two seconds later), the recipient address (C), truncated to 35 characters, and annotated with C because the delivery was attempted locally. If the delivery is to a remote address, the C is replaced with C. The final status of the attempt is C, indicating a successful delivery. C<4.2383s> means that the delivery required 4.2383 seconds to complete. The rest of the delivery lines record the history of the attempted delivery to C, a remote address, indicated by C instead of C. The first six attempts resulted in deferrals (C instead of C); the seveenth attempt, at 6:12:08 PM, failed (C), resulting in a bounce message. Message elapsed times are notated in one of the following formats: 5.0038s (5.0038 seconds) 5:38m (five minutes, 38 seconds) 5:38h (five hours, 38 minutes) 5d03h (five days, three hours) Message sizes are notated in one of the following formats: 383 b (383 bytes exactly) 383 kb (about 383 kilobytes; that is, between 391680 and 392703 bytes) 383 Mb (about 383 megabytes) 383 Gb (about 383 gigabytes) =head2 Writing a Customer C function The C function is passed a reference to a message structure. The message structure is a hash with the following keys: =over 4 =item C The time at which the message was first placed in the queue, in Unix epoch format. =item C The message number of the message. (Note that message numbers are I unique across messages. After a message is removed from the queue, its message number becomes available for use by new messages.) =item C C if a bounce message was generated for this message; C<-> if not. =item C Length of the message, in bytes. =item C The sender's address. =item C The time at which the message was removed from the queue, in Unix epoch format. =item C The total amount of time the message spent in the queue, in seconds. =item C A reference to a hash describing the delivery attempts for this message. Keys in this hash are delivery numbers, and are unique across all messages delivered by a single instance of C. Values in the hash are delivery structures, described below. =back Each delivery attempt is represented by a hash with the following keys: =over 4 =item C C or C, according to whether the delivery is attempted locally or remotely. =item C The recipient email address. =item C The time the delivery attempt was begun, in Unix epoch format. =item C The result of the delivery attempt, either C, C, or C. Deferred attempts are normally retried; failed attempts normally result in a bounce message being generated. =item C Any additional information supplied by the receiving system. This includes responses from remote SMTP servers. =item C The time the delivery attempt was complete, in Unix epoch format. =item C The total time taken to complete the delivery attempt, in seconds. =back =head2 Examples The source code contains three example C functions. =head2 Future Developments I will probably turn this script into a Perl module sometime in the near future. =head1 AUTHOR Mark Jason Dominus (mjd-qmail-digest+@plover.com) =head1 COPYRIGHT Copyright 2002 Mark Jason Dominus. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. To obtain a copy of the GNU General Public License, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. =cut