#!/usr/bin/perl # Greytrapping daemon for OpenBSD spamd # Copyright (c) 2006 Bob Beck . All rights reserved. # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # Process spamdb output and look for patterns. Mainly we look # at the greylist entries, and make some decisions about them. if they # look excessively spammish, we take some action against them, by # running spamdb -t -a to add them as a TRAPPED entry to spamd, meaning # spamd in greylist mode will tarpit them for the next 24 hours. use Net::DNS; use Email::Valid; use Sys::Syslog qw(:DEFAULT setlogsock); ###################################################################### # How often to scan spamdb - in seconds. # Don't make this too quick! the point of it is to see MULTIPLE # greylist attempts. This should be run more frequently than the greylist # pass time, enough so you can get two scans in, but not much more. # a good suggestion is every 10 minutes, which allows for two runs # every 30 minutes even if DNS lookups take a long time. $SCAN_INTERVAL = 600; # How many sockets will we use for DNS lookups in parallel. A good suggestion # which works for me at a busy site is 50. Don't crank it too high or # you'll hit maxfiles, etc. Setting this to 0 will *disable* the dns MX # and A checking. $DNS_SOCK_MAX=50; # Perfom count checks on hosts with more than this many tuples $SUSPECT_TUPLES = 5; # Count Checks - how many unique sender domains allowed (max) from one host. # If any host in the greylist has more than SUSPECT_TUPLES, and is sending # from more than MAX_DOMAINS, they get trapped. $MAX_DOMAINS = 3; # Count Checks - Max unique sender/tuple ratio. # If any host in the greylist has more than SUSPECT_TUPLES, we count # the number of unique senders in the tuple. if ths number of unique # senders divided by the number of tuples is greater than MAX_SENDERS_RATIO # we trap the host. $MAX_SENDERS_RATIO = 0.75; # List of regexen which, on a case insensitive match to the RCPT, greytrap # the host. Just like spamd greytraps, but these can be regexed. If any # recipient matches any of these regexs, we trap the host. Old domains # or mailservers make great additions to this. @BADRERCPT = ( "\@oldmailserver.mydomain.com\$", "\@unusedomain.org\$", ); # External address checker. if this file exists and is executable, # we will run it for every recipient, giving the recipient address # as an argument on the command line. If the program then exits with # a non-zero exit status, we trap the host sending to this address. # for example, if you maintain an ldap directory for all your users # write a quick script to validate a mail address, and you've got # real power here by trapping any greylisted host that mails to # a bogus address. $EXTERNAL_ADDRESS_CHECKER = "/etc/mail/greytrap_checkrcpt"; ###################################################################### sub daemonize { chdir '/' or die "can't chdir to /"; open STDIN, '/dev/null' or die "can't open /dev/null"; open STDOUT, '>>/dev/null' or die "can't open /dev/null"; open STDERR, '>>/dev/null' or die "can't open /dev/null"; defined (my $pid = fork) or die "Can't fork: $!"; exit if ($pid); setsid or die "can't setsid: $!"; # umask 0; } # Trap a host. Adds them as TRAPPED to spamdb, meaning they will be # tarpitted by spamd for 24 hours from the time we run this. sub trap { my $ip = shift; my $reason = shift; if (!$TOAST{$ip}) { $TOAST{$ip}=1; system "spamdb -t -a $ip\n"; syslog ('info', "Trapped $ip: $reason"); } } # This routine tells us if a single destination rcpt is bogus sub badrcpt { my $rcpt = shift; # 1) check against the BADRERCPT... foreach $re (@BADRERCPT) { if ($rcpt =~ /$re/i) { # match. trap the host. return(1); } } if (-x $EXTERNAL_ADDRESS_CHECKER) { if (system(("$EXTERNAL_ADDRESS_CHECKER", "$rcpt")) != 0) { # address checker says $re is bad - trap the host return(1); } } #rcpt is ok return 0 return(0); } sub scan { setlogsock('unix'); openlog("greytrapper", 'pid', 'mail') || die "can't openlog"; my %WHITE; my %GREY; my %TRAPPED; my %FROM; my %RCPT; my %SENDERS; %TOAST = undef; open (SPAMDB, "spamdb|") || die "can't invoke spamdb!"; while () { # read add to associative arrays.. chomp; if (/^WHITE\|/) { #Remember the whitelisted entries. @line=split('\|'); $WHITE{"$line[1]"} = $_; } elsif (/^TRAPPED\|/) { #Remember any TRAPPED entries. #Remember the whitelisted entries. @line=split('\|'); $TRAPPED{"$line[1]"} = $_; } elsif (/^GREY\|/) { # process a greylist entry @line=split('\|'); if ($GREY{"$line[1]"}) { $GREY{"$line[1]"} .= "\t$_"; # strip off enclosing <> if present $line[2] =~ s/^$//; $line[3] =~ s/^$//; $FROM{"$line[1]"} .= "\t$line[3]"; $RCPT{"$line[1]"} .= "\t$line[4]"; } else { $GREY{"$line[1]"} = "$_"; $line[2] =~ s/^$//; $line[3] =~ s/^$//; $FROM{"$line[1]"} = "$line[3]"; $RCPT{"$line[1]"} = "$line[4]"; } } } close (SPAMDB); my $wi = keys %WHITE; my $tr = keys %TRAPPED; my $gr = keys %GREY; syslog ('debug', "scanned $wi whitelisted, $tr trapped, $gr unique greys\n"); foreach $grey (keys %GREY) { my $trapped = 0; # ignore if it's already done next if ($TRAPPED{$grey} || $WHITE{$grey}); # check the senders. if any are malformed, give the host the boot. my @senders = split("\t", $FROM{$grey}); foreach $s (@senders) { unless(Email::Valid->address(-address =>"$s", -fudge => 1, -local_rules => 1)) { &trap($grey, "Invalid source address $s ($Email::Valid::Details)"); $trapped = 1; } last if $trapped; } next if $trapped; my $count = @senders; my @rcpts = split("\t", $RCPT{$grey}); # if the host has queued up more than our suspect threshold, look # at a few things... if ($count > $SUSPECT_TUPLES) { my $reason = ""; my %R = undef; my %S = undef; my %D = undef; # check how many unique senders and recipients, and domains. foreach $r(@rcpts) { $R{"$r"}++; } foreach $s(@senders) { $S{"$s"}++; $s =~ s/[^\@]+\@//; $D{"$s"}++; } my $rcount = keys %R; my $scount = keys %S; my $dcount = keys %D; if ($dcount > $MAX_DOMAINS) { $reason = "Host sending from " . $dcount . " domains (> $MAX_DOMAINS)"; } elsif ($scount/$count > $MAX_SENDERS_RATIO) { $reason = "Senders/Tuples ration is $scount/$count" . " senders/tuples (> $MAX_SENDERS_RATIO)"; } else { # We could do checks on number of recipients, however, we # must be careful here. a mailing list server mails from a # small number of senders to a potentially large number of # recipients. While this could also be a spammer using a # small number of source addresses that's not been typical # observed behaviour (at least in 2006) # XXX wait and see here.. we may or may not need to do # more stuff here. } if ($reason) { &trap($grey, $reason); $trapped = 1; } } next if $trapped; # now check destination addresses... foreach $r (@rcpts) { if (&badrcpt($r)) { &trap($grey, "Mailed to trap address $r"); $trapped = 1; next; } last if $trapped; } next if $trapped; next if (! $DNS_SOCK_MAX); # skip rest if not using DNS checks; # finally, we will check for an MX or A record of the source address. # first we build a hash of all the senders, keyed by host part # of the address, so we only look each host part up once, no matter # how many hosts are sending mail with it as the sender. my %done = undef; foreach $s (@senders) { # extract the host part. my $h = ($s =~ /^.*@(.*)$/ ? $1 : $s); $h =~ s/\s_+//g; if (! $done{"$h"}) { if ($SENDERS{$h}) { $SENDERS{$h} .= "\t $grey"; } else { $SENDERS{$h} = "$grey"; } } $done{$h}++; } %done = undef; } if (! $DNS_SOCK_MAX) { exit(0); } # DNS sucks moose rocks. So we have to do a bazillion queries in # parallel to get any kind of speed. Sigh... Whip through the list of # addresses being sent, and validate them by checking for an A or # MX record. We don't use Email::Validate because it can't do background # queries. instead we use Net::DNS directly and call select.. my $timeout = 5; my $sel = IO::Select->new; my $res = Net::DNS::Resolver->new; my @domains = (keys %SENDERS); while (scalar @domains > 0) { my @active = $sel->handles; while ($#active < $DNS_SOCK_MAX - 1) { # queue up a query for this domain. my $d = pop(@domains); $sel->add($res->bgsend($d, "A")); $sel->add($res->bgsend($d, "MX")); @active = $sel->handles; } my @ready = $sel->can_read($timeout); if (@ready) { foreach my $sock (@ready) { my $packet = $res->bgread($sock); if ($packet->header->ancount) { my @q = $packet->question; if ($q[0]->qtype eq "A" || $q[0]->qtype eq "MX") { my $d = $q[0]->qname; delete $SENDERS{$d}; } } # Check for the other sockets. $sel->remove($sock); $sock = undef; } } else { # nothing for now. } } @domains = undef; my $timedout = 0; my @ready; while($timedout < 4 && $sel->handles && (@ready = $sel->can_read($timeout))) { if (@ready) { foreach my $sock (@ready) { my $packet = $res->bgread($sock); if ($packet->header->ancount) { my @q = $packet->question; if ($q[0]->qtype eq "A" || $q[0]->qtype eq "MX") { my $d = $q[0]->qname; delete $SENDERS{$d}; } } # Check for the other sockets. $sel->remove($sock); $sock = undef; } } else { $timedout++; } } # now whatever is left in $SENDERS is evil - we removed everything # we could find a mailer for. We go through the evil addresses # and trap anyone sending from one... my @Evil = keys %SENDERS; foreach $evil (@Evil) { my @deaders = split("\t", $SENDERS{$evil}); foreach $dead (@deaders) { &trap($dead, "Mailed from sender $evil with no MX or A"); } @deaders=undef; } } # daemonize and scan in a loop. &daemonize; while (1) { setlogsock('unix'); openlog("greytrapper", 'pid', 'mail') || die "can't openlog"; syslog('debug', "Scan started"); my $pid; $pid = fork(); if (!$pid) { # child. scan away... &scan; exit(0); } # parent waits and sleeps. wait; syslog('debug', "Scan completed"); sleep($SCAN_INTERVAL); }