Working with pre generated big hash database for whitelisting

This commit is contained in:
Gabor Gyorvari
2018-12-10 13:02:03 +01:00
parent ed5bc006e4
commit 5cf90cd371
2 changed files with 149 additions and 7 deletions

115
scan.php
View File

@@ -39,6 +39,7 @@ class MalwareScanner
private $flagFollowSymlink = false;
private $flagLineNumber = false;
private $flagScanEverything = false;
private $flagBigData = false;
private $outputFormat = '';
private $whitelist = array();
private $ignore = array();
@@ -54,6 +55,8 @@ class MalwareScanner
private $patterns_re = array();
private $patterns_b64functions = array();
private $patterns_b64keywords = array();
private $bigdata = array();
private $bigdata_count = 0;
/**
* MalwareScanner constructor.
@@ -124,6 +127,11 @@ class MalwareScanner
//Check if the md5 checksum exists in the whitelist and returns true if it does.
private function inWhitelist($hash)
{
if ($this->flagBigData) {
if ($this->binarySearch($hash, $this->bigdata, $this->bigdata_count) > -1) {
return true;
}
}
return in_array($hash, $this->whitelist);
}
@@ -221,7 +229,8 @@ class MalwareScanner
'line-number',
'output-format:',
'wordpress-version:',
'scan-everything'
'scan-everything',
'big-data'
)
);
@@ -301,6 +310,9 @@ class MalwareScanner
if (isset($options['scan-everything']) || isset($options['E'])) {
$this->setFlagScanEverything(true);
}
if (isset($options['big-data'])) {
$this->setFlagBigData(true);
}
}
public function setExtensions(array $a)
@@ -384,6 +396,11 @@ class MalwareScanner
$this->flagScanEverything = $b;
}
public function setFlagBigData($b)
{
$this->flagBigData = $b;
}
// @see http://stackoverflow.com/a/13914119
private function pathMatches($path, $pattern, $ignoreCase = false)
{
@@ -557,24 +574,33 @@ class MalwareScanner
echo 'Total malware identified: ' . $this->stat['files_infected'] . PHP_EOL;
}
//Validates the input directory
//Calls the load pattern and load whitelist functions
//Calls the process and report functions.
/**
* Validates the input directory
*
* - Calls the load pattern and load whitelist functions
* - Fetch and load big data white list
* - Calls the process and report functions.
*
* @param $dir
* @return bool
*/
public function run($dir)
{
//Make sure the input is a valid directory path.
// Make sure the input is a valid directory path.
$dir = rtrim($dir, '/');
if (!is_dir($dir)) {
$this->error('Specified path is not a directory: ' . $dir);
return false;
}
//Load Patterns
$this->initializePatterns();
//Load Whitelist
$this->loadWhitelist();
if ($this->flagBigData && !$this->updateBigData()) {
return false;
}
$start = time();
$this->process($dir . '/');
$this->report($start, $dir . '/');
@@ -674,6 +700,80 @@ class MalwareScanner
}
}
// @see https://www.mkwd.net/binary-search-algorithm-in-php/
private function binarySearch($needle, array $haystack, $high, $low = 0)
{
$key = false;
// Whilst we have a range. If not, then that match was not found.
while ($high >= $low) {
// Find the middle of the range.
$mid = (int)floor(($high + $low) / 2);
// Compare the middle of the range with the needle. This should return <0 if it's in the first part of the range,
// or >0 if it's in the second part of the range. It will return 0 if there is a match.
$cmp = strcmp($needle, $haystack[$mid]);
// Adjust the range based on the above logic, so the next loop iteration will use the narrowed range
if ($cmp < 0) {
$high = $mid - 1;
} elseif ($cmp > 0) {
$low = $mid + 1;
} else {
$key = $mid;
break;
}
}
return $key;
}
private function updateBigData()
{
$url = 'http://127.0.0.1:4000';
$latest_hash = trim(file_get_contents($url . '/database/compressed.sha256'));
if ($latest_hash === false) {
$this->error('Unable to download database checksum');
return false;
}
$file = __DIR__ . '/bigdata.dat';
if (is_readable($file)) {
$hash = hash_file('sha256', $file);
if ($hash != $latest_hash) {
$download = true;
} else {
$download = false;
}
} else {
$download = true;
}
if ($download) {
$data = file_get_contents($url . '/database/compressed.dat');
if ($data === false) {
$this->error('Unable to download database');
return false;
}
file_put_contents($file, $data);
$hash = hash_file('sha256', $file);
if ($hash != $latest_hash) {
$this->error('Downloaded database hash mismatch');
}
}
$content = gzdecode(file_get_contents($file));
$this->bigdata = [];
$this->bigdata_count = 0;
foreach (explode("\n", $content) as $line) { // faster than strtok, but needs more memory
if ($line) {
$this->bigdata[] = $line;
$this->bigdata_count++;
}
}
$this->bigdata_count -= 1; // -1 because we use indexes in binary search
echo 'Big data loaded hash count: ' . ($this->bigdata_count + 1) . PHP_EOL;
return true;
}
//Prints out the usage menu options.
private function showHelp()
{
@@ -698,6 +798,7 @@ class MalwareScanner
echo ' -L --line-number Display matching pattern line number in file' . PHP_EOL;
echo ' -o --output-format Custom defined output format' . PHP_EOL;
echo ' -j --wordpress-version Version of wordpress to get md5 signatures' . PHP_EOL;
echo ' --big-data General whitelist' . PHP_EOL;
}

View File

@@ -0,0 +1,41 @@
<?php
if (!is_readable(__DIR__ . '/cache')) {
if (!mkdir(__DIR__ . '/cache')) {
die('Unablle to create cache directory');
}
}
echo 'Fetching checksums.kubik-rubik.de' . PHP_EOL;
$data = file_get_contents('https://checksums.kubik-rubik.de');
$fp = fopen('all.txt', 'w');
// excluded: contao
foreach (['drupal', 'joomla', 'pagekit', 'typo3', 'wordpress'] as $type) {
preg_match_all('/<a class="btn btn-success" href="(' . $type . '\/.*?)">\s+JSON.*?<\/a>/is', $data, $m);
foreach ($m[1] as $url) {
$file = str_replace(['/', '.'], '_', $url); // fix file name
if (!is_readable(__DIR__ . '/cache/' . $file . '.json')) {
echo 'Downloading: ' . 'https://checksums.kubik-rubik.de/' . $url . PHP_EOL;
$json = file_get_contents('https://checksums.kubik-rubik.de/' . $url);
file_put_contents(__DIR__ . '/cache/' . $file . '.json', $json);
} else {
$json = file_get_contents(__DIR__ . '/cache/' . $file . '.json');
}
// decode json, write hash values to one file
echo 'Parsing json file: ' . $file . PHP_EOL;
foreach (json_decode($json) as $k => $hash) {
fputs($fp, $hash . "\n");
}
}
}
fclose($fp);
echo 'Create unique database' . PHP_EOL;
exec('sort -u -o unique.txt all.txt');
echo 'Compressing all.txt' . PHP_EOL;
exec('gzip < unique.txt > compressed.dat'); // gzencode