mirror of
https://github.com/scr34m/php-malware-scanner.git
synced 2026-06-16 12:30:35 +00:00
Merge branch 't43'
This commit is contained in:
114
scan.php
114
scan.php
@@ -39,6 +39,7 @@ class MalwareScanner
|
|||||||
private $flagFollowSymlink = false;
|
private $flagFollowSymlink = false;
|
||||||
private $flagLineNumber = false;
|
private $flagLineNumber = false;
|
||||||
private $flagScanEverything = false;
|
private $flagScanEverything = false;
|
||||||
|
private $flagCombinedWhitelist = false;
|
||||||
private $outputFormat = '';
|
private $outputFormat = '';
|
||||||
private $whitelist = array();
|
private $whitelist = array();
|
||||||
private $ignore = array();
|
private $ignore = array();
|
||||||
@@ -54,6 +55,8 @@ class MalwareScanner
|
|||||||
private $patterns_re = array();
|
private $patterns_re = array();
|
||||||
private $patterns_b64functions = array();
|
private $patterns_b64functions = array();
|
||||||
private $patterns_b64keywords = array();
|
private $patterns_b64keywords = array();
|
||||||
|
private $combined_whitelist = array();
|
||||||
|
private $combined_whitelist_count = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* MalwareScanner constructor.
|
* MalwareScanner constructor.
|
||||||
@@ -124,6 +127,11 @@ class MalwareScanner
|
|||||||
//Check if the md5 checksum exists in the whitelist and returns true if it does.
|
//Check if the md5 checksum exists in the whitelist and returns true if it does.
|
||||||
private function inWhitelist($hash)
|
private function inWhitelist($hash)
|
||||||
{
|
{
|
||||||
|
if ($this->flagCombinedWhitelist) {
|
||||||
|
if ($this->binarySearch($hash, $this->combined_whitelist, $this->combined_whitelist_count) > -1) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
return in_array($hash, $this->whitelist);
|
return in_array($hash, $this->whitelist);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -221,7 +229,8 @@ class MalwareScanner
|
|||||||
'line-number',
|
'line-number',
|
||||||
'output-format:',
|
'output-format:',
|
||||||
'wordpress-version:',
|
'wordpress-version:',
|
||||||
'scan-everything'
|
'scan-everything',
|
||||||
|
'combined-whitelist'
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -301,6 +310,9 @@ class MalwareScanner
|
|||||||
if (isset($options['scan-everything']) || isset($options['E'])) {
|
if (isset($options['scan-everything']) || isset($options['E'])) {
|
||||||
$this->setFlagScanEverything(true);
|
$this->setFlagScanEverything(true);
|
||||||
}
|
}
|
||||||
|
if (isset($options['combined-whitelist'])) {
|
||||||
|
$this->setFlagCombinedWhitelist(true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public function setExtensions(array $a)
|
public function setExtensions(array $a)
|
||||||
@@ -384,6 +396,11 @@ class MalwareScanner
|
|||||||
$this->flagScanEverything = $b;
|
$this->flagScanEverything = $b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function setFlagCombinedWhitelist($b)
|
||||||
|
{
|
||||||
|
$this->flagCombinedWhitelist = $b;
|
||||||
|
}
|
||||||
|
|
||||||
// @see http://stackoverflow.com/a/13914119
|
// @see http://stackoverflow.com/a/13914119
|
||||||
private function pathMatches($path, $pattern, $ignoreCase = false)
|
private function pathMatches($path, $pattern, $ignoreCase = false)
|
||||||
{
|
{
|
||||||
@@ -558,24 +575,33 @@ class MalwareScanner
|
|||||||
echo 'Total malware identified: ' . $this->stat['files_infected'] . PHP_EOL;
|
echo 'Total malware identified: ' . $this->stat['files_infected'] . PHP_EOL;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Validates the input directory
|
/**
|
||||||
//Calls the load pattern and load whitelist functions
|
* Validates the input directory
|
||||||
//Calls the process and report functions.
|
*
|
||||||
|
* - Calls the load pattern and load whitelist functions
|
||||||
|
* - Fetch and load combined whitelist
|
||||||
|
* - Calls the process and report functions.
|
||||||
|
*
|
||||||
|
* @param $dir
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
public function run($dir)
|
public function run($dir)
|
||||||
{
|
{
|
||||||
//Make sure the input is a valid directory path.
|
// Make sure the input is a valid directory path.
|
||||||
$dir = rtrim($dir, '/');
|
$dir = rtrim($dir, '/');
|
||||||
if (!is_dir($dir)) {
|
if (!is_dir($dir)) {
|
||||||
$this->error('Specified path is not a directory: ' . $dir);
|
$this->error('Specified path is not a directory: ' . $dir);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Load Patterns
|
|
||||||
$this->initializePatterns();
|
$this->initializePatterns();
|
||||||
|
|
||||||
//Load Whitelist
|
|
||||||
$this->loadWhitelist();
|
$this->loadWhitelist();
|
||||||
|
|
||||||
|
if ($this->flagCombinedWhitelist && !$this->updateCombinedWhitelist()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
$start = time();
|
$start = time();
|
||||||
$this->process($dir . '/');
|
$this->process($dir . '/');
|
||||||
$this->report($start, $dir . '/');
|
$this->report($start, $dir . '/');
|
||||||
@@ -671,6 +697,79 @@ class MalwareScanner
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// @see https://www.mkwd.net/binary-search-algorithm-in-php/
|
||||||
|
private function binarySearch($needle, array $haystack, $high, $low = 0)
|
||||||
|
{
|
||||||
|
$key = false;
|
||||||
|
// Whilst we have a range. If not, then that match was not found.
|
||||||
|
while ($high >= $low) {
|
||||||
|
// Find the middle of the range.
|
||||||
|
$mid = (int)floor(($high + $low) / 2);
|
||||||
|
// Compare the middle of the range with the needle. This should return <0 if it's in the first part of the range,
|
||||||
|
// or >0 if it's in the second part of the range. It will return 0 if there is a match.
|
||||||
|
$cmp = strcmp($needle, $haystack[$mid]);
|
||||||
|
// Adjust the range based on the above logic, so the next loop iteration will use the narrowed range
|
||||||
|
if ($cmp < 0) {
|
||||||
|
$high = $mid - 1;
|
||||||
|
} elseif ($cmp > 0) {
|
||||||
|
$low = $mid + 1;
|
||||||
|
} else {
|
||||||
|
$key = $mid;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $key;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function updateCombinedWhitelist($url = 'https://scr34m.github.io/php-malware-scanner')
|
||||||
|
{
|
||||||
|
$latest_hash = trim(file_get_contents($url . '/database/compressed.sha256'));
|
||||||
|
if ($latest_hash === false) {
|
||||||
|
$this->error('Unable to download database checksum');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$file = __DIR__ . '/whitelist.dat';
|
||||||
|
if (is_readable($file)) {
|
||||||
|
$hash = hash_file('sha256', $file);
|
||||||
|
if ($hash != $latest_hash) {
|
||||||
|
$download = true;
|
||||||
|
} else {
|
||||||
|
$download = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$download = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($download) {
|
||||||
|
$data = file_get_contents($url . '/database/compressed.dat');
|
||||||
|
if ($data === false) {
|
||||||
|
$this->error('Unable to download database');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
file_put_contents($file, $data);
|
||||||
|
$hash = hash_file('sha256', $file);
|
||||||
|
if ($hash != $latest_hash) {
|
||||||
|
$this->error('Downloaded database hash mismatch');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$content = gzdecode(file_get_contents($file));
|
||||||
|
$this->combined_whitelist = [];
|
||||||
|
$this->combined_whitelist_count = 0;
|
||||||
|
foreach (explode("\n", $content) as $line) { // faster than strtok, but needs more memory
|
||||||
|
if ($line) {
|
||||||
|
$this->combined_whitelist[] = $line;
|
||||||
|
$this->combined_whitelist_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->combined_whitelist_count -= 1; // -1 because we use indexes in binary search
|
||||||
|
echo 'Combined whitelist records count: ' . ($this->combined_whitelist_count + 1) . PHP_EOL;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
//Prints out the usage menu options.
|
//Prints out the usage menu options.
|
||||||
private function showHelp()
|
private function showHelp()
|
||||||
{
|
{
|
||||||
@@ -695,6 +794,7 @@ class MalwareScanner
|
|||||||
echo ' -L --line-number Display matching pattern line number in file' . PHP_EOL;
|
echo ' -L --line-number Display matching pattern line number in file' . PHP_EOL;
|
||||||
echo ' -o --output-format Custom defined output format' . PHP_EOL;
|
echo ' -o --output-format Custom defined output format' . PHP_EOL;
|
||||||
echo ' -j --wordpress-version Version of wordpress to get md5 signatures' . PHP_EOL;
|
echo ' -j --wordpress-version Version of wordpress to get md5 signatures' . PHP_EOL;
|
||||||
|
echo ' --combined-whitelist Combined whitelist' . PHP_EOL;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
92
tools/bigdata/generate.php
Normal file
92
tools/bigdata/generate.php
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
<?php
|
||||||
|
$cache_dir = __DIR__ . '/cache';
|
||||||
|
|
||||||
|
function is_cached($file)
|
||||||
|
{
|
||||||
|
global $cache_dir;
|
||||||
|
|
||||||
|
return is_readable($cache_dir . '/' . $file);
|
||||||
|
}
|
||||||
|
|
||||||
|
function set_cache($file, $data)
|
||||||
|
{
|
||||||
|
global $cache_dir;
|
||||||
|
|
||||||
|
file_put_contents($cache_dir . '/' . $file, $data);
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_cache($file)
|
||||||
|
{
|
||||||
|
global $cache_dir;
|
||||||
|
|
||||||
|
return file_get_contents($cache_dir . '/' . $file);
|
||||||
|
}
|
||||||
|
|
||||||
|
function fetch_kubik_rubik($fp)
|
||||||
|
{
|
||||||
|
echo 'Fetching checksums.kubik-rubik.de' . PHP_EOL;
|
||||||
|
$data = file_get_contents('https://checksums.kubik-rubik.de');
|
||||||
|
|
||||||
|
// excluded: contao
|
||||||
|
foreach (['drupal', 'joomla', 'pagekit', 'typo3', 'wordpress'] as $type) {
|
||||||
|
preg_match_all('/<a class="btn btn-success" href="(' . $type . '\/.*?)">\s+JSON.*?<\/a>/is', $data, $m);
|
||||||
|
foreach ($m[1] as $url) {
|
||||||
|
$file = str_replace(['/', '.'], '_', $url); // fix file name
|
||||||
|
if (!is_cached($file . '.json')) {
|
||||||
|
echo 'Downloading: ' . 'https://checksums.kubik-rubik.de/' . $url . PHP_EOL;
|
||||||
|
$json = file_get_contents('https://checksums.kubik-rubik.de/' . $url);
|
||||||
|
set_cache($file . '.json', $json);
|
||||||
|
} else {
|
||||||
|
$json = get_cache($file . '.json');
|
||||||
|
}
|
||||||
|
|
||||||
|
// decode json, write hash values to one file
|
||||||
|
echo 'Parsing json file: ' . $file . PHP_EOL;
|
||||||
|
foreach (json_decode($json) as $k => $hash) {
|
||||||
|
fputs($fp, $hash . "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function fetch_jquery($fp)
|
||||||
|
{
|
||||||
|
echo 'Fetching jQuery' . PHP_EOL;
|
||||||
|
$data = file_get_contents('https://code.jquery.com/jquery/');
|
||||||
|
|
||||||
|
preg_match_all('/<a class=\'open\-sri\-modal\' href=\'\/(jquery-.*?\.js)/', $data, $m);
|
||||||
|
foreach ($m[1] as $file) {
|
||||||
|
if (!is_cached($file)) {
|
||||||
|
echo 'Downloading: ' . 'https://code.jquery.com/' . $file . PHP_EOL;
|
||||||
|
$data = file_get_contents('https://code.jquery.com/' . $file);
|
||||||
|
set_cache($file, $data);
|
||||||
|
} else {
|
||||||
|
$data = get_cache($file);
|
||||||
|
}
|
||||||
|
|
||||||
|
fputs($fp, md5($data) . "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_readable($cache_dir)) {
|
||||||
|
if (!mkdir($cache_dir)) {
|
||||||
|
die('Unable to create cache directory');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$fp = fopen('all.txt', 'w');
|
||||||
|
|
||||||
|
fetch_kubik_rubik($fp);
|
||||||
|
fetch_jquery($fp);
|
||||||
|
|
||||||
|
fclose($fp);
|
||||||
|
|
||||||
|
echo 'Creating unique database' . PHP_EOL;
|
||||||
|
exec('sort -u -o unique.txt all.txt');
|
||||||
|
|
||||||
|
echo 'Compressing all.txt' . PHP_EOL;
|
||||||
|
exec('gzip < unique.txt > compressed.dat'); // gzencode
|
||||||
|
|
||||||
|
$hash = hash_file('sha256', 'compressed.dat');
|
||||||
|
file_put_contents('compressed.sha256', $hash);
|
||||||
|
echo 'SHA256 is ' . $hash . PHP_EOL;
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
import urllib2
|
|
||||||
import re
|
|
||||||
import hashlib
|
|
||||||
|
|
||||||
def fetch(url):
|
|
||||||
response = urllib2.urlopen(url)
|
|
||||||
return response.read()
|
|
||||||
|
|
||||||
def main():
|
|
||||||
html = fetch('https://code.jquery.com/jquery/')
|
|
||||||
regex = re.compile(r"<a class='open\-sri\-modal' href='(/jquery-.*?\.js)'")
|
|
||||||
m1 = regex.search(html)
|
|
||||||
for m in regex.findall(html):
|
|
||||||
js = fetch("https://code.jquery.com" + m)
|
|
||||||
hash = hashlib.md5()
|
|
||||||
hash.update(js)
|
|
||||||
print hash.hexdigest() + " " + m
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
Reference in New Issue
Block a user