mirror of
https://github.com/scr34m/php-malware-scanner.git
synced 2026-06-16 12:30:35 +00:00
Combined whitelist sources updated
This commit is contained in:
@@ -1,5 +1,35 @@
|
||||
<?php
|
||||
$cache_dir = __DIR__ . '/cache';
|
||||
function fetch($url, $file = false)
|
||||
{
|
||||
$ch = curl_init();
|
||||
curl_setopt($ch, CURLOPT_URL, $url);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
|
||||
curl_setopt($ch, CURLOPT_VERBOSE, false);
|
||||
|
||||
if ($file) {
|
||||
$fp = fopen($file, 'w');
|
||||
curl_setopt($ch, CURLOPT_FILE, $fp);
|
||||
} else {
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
|
||||
}
|
||||
|
||||
$headers = array(
|
||||
// drupal suxx
|
||||
'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15',
|
||||
);
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
|
||||
|
||||
if ($file) {
|
||||
curl_exec($ch);
|
||||
curl_close($ch);
|
||||
fclose($fp);
|
||||
return;
|
||||
} else {
|
||||
$content = trim(curl_exec($ch));
|
||||
curl_close($ch);
|
||||
return $content;
|
||||
}
|
||||
}
|
||||
|
||||
function is_cached($file)
|
||||
{
|
||||
@@ -23,37 +53,33 @@ function get_cache($file)
|
||||
return file_get_contents($cache_dir . '/' . $file);
|
||||
}
|
||||
|
||||
function fetch_kubik_rubik($fp)
|
||||
function hash_archive($fp, $file)
|
||||
{
|
||||
echo 'Fetching checksums.kubik-rubik.de' . PHP_EOL;
|
||||
$data = file_get_contents('https://checksums.kubik-rubik.de');
|
||||
global $cache_dir;
|
||||
|
||||
// excluded: contao
|
||||
foreach (['drupal', 'joomla', 'pagekit', 'typo3', 'wordpress'] as $type) {
|
||||
preg_match_all('/<a class="btn btn-success" href="(' . $type . '\/.*?)">\s+JSON.*?<\/a>/is', $data, $m);
|
||||
foreach ($m[1] as $url) {
|
||||
$file = str_replace(['/', '.'], '_', $url); // fix file name
|
||||
if (!is_cached($file . '.json')) {
|
||||
echo 'Downloading: ' . 'https://checksums.kubik-rubik.de/' . $url . PHP_EOL;
|
||||
$json = file_get_contents('https://checksums.kubik-rubik.de/' . $url);
|
||||
set_cache($file . '.json', $json);
|
||||
} else {
|
||||
$json = get_cache($file . '.json');
|
||||
}
|
||||
|
||||
// decode json, write hash values to one file
|
||||
echo 'Parsing json file: ' . $file . PHP_EOL;
|
||||
foreach (json_decode($json) as $k => $hash) {
|
||||
fputs($fp, $hash . "\n");
|
||||
$hash_file = $cache_dir . '/' . $file . '.hash';
|
||||
if (!is_file($hash_file)) {
|
||||
$f = fopen($hash_file, 'w');
|
||||
$fh = new RecursiveIteratorIterator(
|
||||
new RecursiveDirectoryIterator('phar://' . $cache_dir . '/' . $file),
|
||||
RecursiveIteratorIterator::CHILD_FIRST
|
||||
);
|
||||
foreach ($fh as $splFileInfo) {
|
||||
if ($splFileInfo->isFile()) {
|
||||
// store md5 hash we use that in the scanner
|
||||
fputs($f, md5(file_get_contents($splFileInfo->getPathname())) . "\n");
|
||||
}
|
||||
}
|
||||
fclose($f);
|
||||
}
|
||||
|
||||
fputs($fp, file_get_contents($hash_file));
|
||||
}
|
||||
|
||||
function fetch_jquery($fp)
|
||||
{
|
||||
echo 'Fetching jQuery' . PHP_EOL;
|
||||
$data = file_get_contents('https://code.jquery.com/jquery/');
|
||||
$data = fetch('https://code.jquery.com/jquery/');
|
||||
|
||||
preg_match_all(
|
||||
'/<a class=\'open\-sri\-modal\' href=\'\/(jquery-.*?\.js)\' data\-hash=\'sha256\-(.*?)\'/',
|
||||
@@ -63,7 +89,7 @@ function fetch_jquery($fp)
|
||||
foreach ($m[1] as $k => $file) {
|
||||
if (!is_cached($file)) {
|
||||
echo 'Downloading: ' . 'https://code.jquery.com/' . $file . PHP_EOL;
|
||||
$data = file_get_contents('https://code.jquery.com/' . $file);
|
||||
$data = fetch('https://code.jquery.com/' . $file);
|
||||
if (base64_encode(hash('sha256', $data, true)) != $m[2][$k]) {
|
||||
die('Hash mismatch' . PHP_EOL);
|
||||
}
|
||||
@@ -72,10 +98,164 @@ function fetch_jquery($fp)
|
||||
$data = get_cache($file);
|
||||
}
|
||||
|
||||
// store md5 hash we use that in the scanner
|
||||
fputs($fp, md5($data) . "\n");
|
||||
}
|
||||
}
|
||||
|
||||
function fetch_archive($file, $url, $hash, $algo, $hash_url = null)
|
||||
{
|
||||
$tmp = __DIR__ . 'dl.tar.gz';
|
||||
if (!is_cached($file)) {
|
||||
echo 'Downloading: ' . $url . PHP_EOL;
|
||||
fetch($url, $tmp);
|
||||
if (!empty($hash_url)) {
|
||||
echo 'Downloading hash: ' . $hash_url . PHP_EOL;
|
||||
$hash = fetch($hash_url);
|
||||
}
|
||||
$data_hash = hash_file($algo, $tmp);
|
||||
if ($data_hash != $hash) {
|
||||
die('Hash mismatch: ' . $data_hash . ' != ' . $hash . PHP_EOL);
|
||||
}
|
||||
set_cache($file, file_get_contents($tmp), $algo, $hash);
|
||||
}
|
||||
}
|
||||
|
||||
// Ignored releases are: beta, RC, strayhorn, mingus, delta, gold and mu by regexp and 1.0.2 because no sha1
|
||||
function fetch_wordpress($fp)
|
||||
{
|
||||
echo 'Fetching Wordpress' . PHP_EOL;
|
||||
$data = fetch('https://wordpress.org/download/releases/');
|
||||
|
||||
preg_match_all(
|
||||
'/<a href="(https:\/\/wordpress\.org\/(wordpress\-([0-9.]+)\.tar\.gz))">/',
|
||||
$data,
|
||||
$m
|
||||
);
|
||||
foreach ($m[2] as $k => $file) {
|
||||
if ($m[2][$k] == 'wordpress-1.0.2.tar.gz') {
|
||||
// no sha1 info
|
||||
continue;
|
||||
}
|
||||
fetch_archive($m[2][$k], $m[1][$k], null, 'sha1', $m[1][$k] . '.sha1');
|
||||
hash_archive($fp, $file);
|
||||
}
|
||||
}
|
||||
|
||||
// Ignores: snapshots, rc, beta, alpha
|
||||
function fetch_typo3($fp)
|
||||
{
|
||||
echo 'Fetching Typo3' . PHP_EOL;
|
||||
$data = json_decode(fetch('https://get.typo3.org/json'));
|
||||
foreach ($data as $value) {
|
||||
if (isset($value->releases)) {
|
||||
foreach ($value->releases as $release) {
|
||||
if (strstr($release->version, 'snapshot') || strstr($release->version, 'rc') || strstr($release->version, 'beta') || strstr($release->version, 'alpha')) {
|
||||
// ignoring snapshots
|
||||
continue;
|
||||
}
|
||||
if (in_array($release->version, ['4.6.0', '4.5.33', '3.3.0'])) {
|
||||
// The specified blob does not exist.
|
||||
// 3.3.0 is damaged archive
|
||||
continue;
|
||||
}
|
||||
$file = 'type3-' . $release->version . '.tar.gz';
|
||||
fetch_archive($file, $release->url->tar, $release->checksums->tar->sha1, 'sha1');
|
||||
hash_archive($fp, $file);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function fetch_pagekit($fp)
|
||||
{
|
||||
echo 'Fetching Pagekit' . PHP_EOL;
|
||||
$data = json_decode(fetch('https://pagekit.com/api/update'));
|
||||
foreach ($data as $k => $releases) {
|
||||
if ($k == 'latest') {
|
||||
$releases = [$releases];
|
||||
}
|
||||
foreach ($releases as $release) {
|
||||
$file = 'pagekit-' . $release->version . '.tar.gz';
|
||||
fetch_archive($file, $release->url, $release->shasum, 'sha1');
|
||||
hash_archive($fp, $file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ignored releases are: alpha, beta, rc, dev
|
||||
function fetch_drupal($fp, $versions)
|
||||
{
|
||||
foreach ($versions as $version => $id) {
|
||||
echo 'Fetching Drupal ' . $version . PHP_EOL;
|
||||
|
||||
$page = 0;
|
||||
$pages = false;
|
||||
do {
|
||||
$data = fetch('https://www.drupal.org/project/drupal/releases?api_version%5B%5D=' . $id . '&page=' .$page);
|
||||
|
||||
// pagination init
|
||||
if ($pages === false && preg_match('/&page=(\d+)">last »<\/a>/', $data, $m)) {
|
||||
$pages = $m[1];
|
||||
}
|
||||
|
||||
preg_match_all(
|
||||
'/data-th="Download">(.*?)<a href="(https:\/\/ftp\.drupal\.org\/files\/projects\/(drupal\-([0-9.]+)\.tar\.gz)).*?md5 hash">\s*([a-z0-9]{32})\s*<\/td>/is',
|
||||
$data,
|
||||
$m
|
||||
);
|
||||
foreach ($m[3] as $k => $file) {
|
||||
fetch_archive($file, $m[2][$k], $m[5][$k], 'md5');
|
||||
hash_archive($fp, $file);
|
||||
}
|
||||
|
||||
if ($pages === false) {
|
||||
break;
|
||||
}
|
||||
|
||||
$page++;
|
||||
}while($page <= $pages);
|
||||
}
|
||||
}
|
||||
|
||||
function fetch_joomla($fp, $versions)
|
||||
{
|
||||
foreach ($versions as $version => $id) {
|
||||
echo 'Fetching Joomla ' . $version . PHP_EOL;
|
||||
|
||||
$data = fetch('https://downloads.joomla.org/cms/joomla' . $id);
|
||||
preg_match_all('/href="(\/cms\/joomla\d+\/(\d+\-\d+\-\d+))"/', $data, $m);
|
||||
foreach ($m[1] as $k => $url) {
|
||||
$file = 'joomla_' . $m[2][$k] . '-stable-full_package.tar.gz';
|
||||
|
||||
// pre check because we need hash information
|
||||
if (!is_cached($file)) {
|
||||
$data = fetch('https://downloads.joomla.org' . $url);
|
||||
|
||||
if (!preg_match('/Joomla! '.str_replace('-', '\.', $m[2][$k]).' Full Package \(\.tar\.gz\).*?SHA1 Signature\s*<\/dt>\s*<dd>\s*([a-z0-9]{40})\s*<\/dd>/is', $data, $m2)) {
|
||||
echo 'Unable to find SHA1 signature for version ' . $m[2][$k] . PHP_EOL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!preg_match('/href="('.preg_quote($url, '/').'\/.*?format=gz)"/', $data, $m3)) {
|
||||
echo 'Unable to find archive url for version ' . $m[2][$k] . PHP_EOL;
|
||||
break;
|
||||
}
|
||||
|
||||
fetch_archive($file, 'https://downloads.joomla.org' . $m3[1], $m2[1], 'sha1');
|
||||
}
|
||||
|
||||
hash_archive($fp, $file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($argc == 2) {
|
||||
$cache_dir = $argv[1];
|
||||
} else {
|
||||
$cache_dir = __DIR__ . '/cache';
|
||||
}
|
||||
|
||||
if (!is_readable($cache_dir)) {
|
||||
if (!mkdir($cache_dir)) {
|
||||
die('Unable to create cache directory');
|
||||
@@ -84,8 +264,32 @@ if (!is_readable($cache_dir)) {
|
||||
|
||||
$fp = fopen('all.txt', 'w');
|
||||
|
||||
// fetch_kubik_rubik($fp);
|
||||
// TODO https://modx.com/download/other-downloads
|
||||
// TODO wordpress plugins only popular ones
|
||||
|
||||
fetch_jquery($fp);
|
||||
fetch_wordpress($fp);
|
||||
fetch_typo3($fp);
|
||||
fetch_pagekit($fp);
|
||||
fetch_drupal(
|
||||
$fp,
|
||||
[
|
||||
'9.x' => 39794,
|
||||
'8.x' => 7234,
|
||||
'7.x' => 103,
|
||||
'6.x' => 87,
|
||||
'5.x' => 78,
|
||||
'4.7.x' => 79,
|
||||
'4.6.x' => 80,
|
||||
'4.5.x' => 81,
|
||||
'4.4.x' => 82,
|
||||
'4.3.x' => 83,
|
||||
'4.2.x' => 84,
|
||||
'4.1.x' => 85,
|
||||
'4.0.x' => 86
|
||||
]
|
||||
);
|
||||
fetch_joomla($fp, ['3.0' => 3, '2.5' => 25, '1.5' => 15, '1.0' => 10]);
|
||||
|
||||
fclose($fp);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user