mirror of
https://github.com/scr34m/php-malware-scanner.git
synced 2026-06-16 12:30:35 +00:00
Working with pre generated big hash database for whitelisting
This commit is contained in:
41
tools/bigdata/generate.php
Normal file
41
tools/bigdata/generate.php
Normal file
@@ -0,0 +1,41 @@
|
||||
<?php
|
||||
|
||||
if (!is_readable(__DIR__ . '/cache')) {
|
||||
if (!mkdir(__DIR__ . '/cache')) {
|
||||
die('Unablle to create cache directory');
|
||||
}
|
||||
}
|
||||
|
||||
echo 'Fetching checksums.kubik-rubik.de' . PHP_EOL;
|
||||
$data = file_get_contents('https://checksums.kubik-rubik.de');
|
||||
|
||||
$fp = fopen('all.txt', 'w');
|
||||
|
||||
// excluded: contao
|
||||
foreach (['drupal', 'joomla', 'pagekit', 'typo3', 'wordpress'] as $type) {
|
||||
preg_match_all('/<a class="btn btn-success" href="(' . $type . '\/.*?)">\s+JSON.*?<\/a>/is', $data, $m);
|
||||
foreach ($m[1] as $url) {
|
||||
$file = str_replace(['/', '.'], '_', $url); // fix file name
|
||||
if (!is_readable(__DIR__ . '/cache/' . $file . '.json')) {
|
||||
echo 'Downloading: ' . 'https://checksums.kubik-rubik.de/' . $url . PHP_EOL;
|
||||
$json = file_get_contents('https://checksums.kubik-rubik.de/' . $url);
|
||||
file_put_contents(__DIR__ . '/cache/' . $file . '.json', $json);
|
||||
} else {
|
||||
$json = file_get_contents(__DIR__ . '/cache/' . $file . '.json');
|
||||
}
|
||||
|
||||
// decode json, write hash values to one file
|
||||
echo 'Parsing json file: ' . $file . PHP_EOL;
|
||||
foreach (json_decode($json) as $k => $hash) {
|
||||
fputs($fp, $hash . "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fclose($fp);
|
||||
|
||||
echo 'Create unique database' . PHP_EOL;
|
||||
exec('sort -u -o unique.txt all.txt');
|
||||
|
||||
echo 'Compressing all.txt' . PHP_EOL;
|
||||
exec('gzip < unique.txt > compressed.dat'); // gzencode
|
||||
Reference in New Issue
Block a user