1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
| #!/usr/bin/perl
use warnings;
umask 0;
use Config;
use DBI;
use DBD::mysql;
use LWP::Simple;
use threads;
use strict;
use CGI;
use CGI::Carp qw(warningsToBrowser fatalsToBrowser);
use Socket;
my $cgi = CGI->new;
print $cgi->header;
$| = 1;
$Config{useithreads} or die "Recompile Perl with threads to run this program. \n";
my @threads;
my $threadcount = 50;
my $enablethreading = 1;
my $dbh;
sub connectDB {
my $hostname = "localhost";
my $database = "xxxx";
my $username = "xxx";
my $password = "xxxx";
$dbh = DBI->connect("DBI:mysql:$database:$hostname", $username, $password) or die "Can't connect to the DB: $DBI::errstr\n";
}
sub getip {
my $addr = gethostbyname(shift);
$addr ? inet_ntoa $addr : undef;
}
sub processHost {
my ($host) = @_;
my ($hostonly) = $host =~ m/[a-z0-9][a-z0-9-.]*[.][a-z.]+[a-z]/ig;
my $ip = getip($hostonly);
connectDB();
if ($ip) {
print "<b>[ " . threads->self()->tid() . " ]</b> <a href=\"http://$host\">$host</a> - $ip <br/>\n";
$dbh->prepare("INSERT IGNORE INTO listeIp (Ipadress, url, Type, InsertDate) VALUES ('$ip', '$host', 'WebProxy', NOW())")->execute();
} else {
print "<b>[ " . threads->self()->tid() . " ]</b> <a href=\"http://$host\">$host</a> - NO IP <br/>\n";
$dbh->prepare("INSERT IGNORE INTO UrltoVerify (url) VALUES ('$host')")->execute();
}
}
sub parseHosts {
my ($url) = @_;
my $content = `curl --header "User-agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14" "$url" 2>&1`;
my $hostpattern = "[a-z0-9][a-z0-9-.]*[.][a-z.]+[a-z]/?[a-z0-9_./-]*";
my $p1 = "href=[\"]($hostpattern)[\"] onclick";
my $p2 = "href=[\"]http://($hostpattern)[\"] onclick";
my $p3 = "blank[\"]>($hostpattern)</a></td><td>";
my $c = 0;
while ($content =~ m/($p1)|($p2)|($p3)/igs) {
if ($2) {
processHost($2);
} elsif ($4) {
processHost($4);
} elsif ($6) {
processHost($6);
}
}
}
sub parseHostsThreaded {
my ($url) = @_;
for (my $t = 0; $t < $threadcount; $t++) {
if (!$threads[$t]) {
$threads[$t] = threads->new(\&parseHosts, $url);
return;
}
}
closeOpenThreads();
parseHostsThreaded($url);
}
sub closeOpenThreads {
for (my $t = 0; $t < $threadcount; $t++) {
if ($threads[$t]) {
$threads[$t]->join();
$threads[$t] = 0;
}
}
}
connectDB();
my $sth = $dbh->prepare("SELECT * FROM listeUrl WHERE category=2");
$sth->execute;
my $t = -1;
while (my @row = $sth->fetchrow_array()) {
if ($enablethreading) {
parseHostsThreaded($row[1]);
} else {
parseHosts($row[1]);
}
}
closeOpenThreads(); |
Partager