We recently were having an issue where one of the php servers would hit a CPU bottleneck due to a web crawler sitting on a KeepAlive connection and churning through several thousand pages. This was discovered by looking at the highest CPU children of Apache and noticing they were almost universally Yahoo or Google crawlers. Below is a perl subroutine that I whipped up for another monitoring script that will pull CPU and PID info from the output of a '
top -bn1' invocation, and will then renice any non root apache processes using more then 20% of a CPU.
sub renice_apache
{
my @top = `/usr/bin/top -bn1`;
my %header;
my $command = 'apache2'; # Name of apache process
my @renice;
my @unnice;
foreach (@top)
{
s/^\s+|\s+$//sg;
my @line = split(/\s+/, $_);
if(defined $line[8] && exists $header{'%cpu'} && $line[0] =~ /^\d+$/)
{
next unless (defined $line[$header{command}] && ($line[$header{user}] !~ /^\s*root\s*$/i) && ($line[$header{command}] =~ /^\s*\Q$command\E\s*$/i));
next unless ($line[$header{'%cpu'}] =~ /^\s*(\d+)/);
push(@renice, $line[$header{pid}]) if (($line[$header{ni}] < 5) && $1 > 20); # More then 20% cpu on one process
push(@unnice, $line[$header{pid}]) if (($line[$header{ni}] > 5) && $1 < 1); # Raise priority on children no longer under load
} elsif (!exists $header{'%cpu'})
{
my %tempheader;
for (0 .. $#line)
{
$tempheader{lc($line[$_])} = $_;
}
if(defined $tempheader{pid} && defined $tempheader{'%cpu'} && defined $tempheader{command})
{
%header = %tempheader;
}
}
}
system("/usr/bin/renice 10 " . join(' ', @renice)) if ($#renice > -1);
system("/usr/bin/renice 3 " . join(' ', @unnice)) if ($#unnice > -1);
}