Files
php-malware-scanner/tools/bigdata/generate.php

92 lines
2.5 KiB
PHP
Raw Normal View History

<?php
$cache_dir = __DIR__ . '/cache';
function is_cached($file)
{
global $cache_dir;
return is_readable($cache_dir . '/' . $file);
}
function set_cache($file, $data)
{
global $cache_dir;
file_put_contents($cache_dir . '/' . $file, $data);
}
function get_cache($file)
{
global $cache_dir;
return file_get_contents($cache_dir . '/' . $file);
}
function fetch_kubik_rubik($fp)
{
echo 'Fetching checksums.kubik-rubik.de' . PHP_EOL;
$data = file_get_contents('https://checksums.kubik-rubik.de');
// excluded: contao
foreach (['drupal', 'joomla', 'pagekit', 'typo3', 'wordpress'] as $type) {
preg_match_all('/<a class="btn btn-success" href="(' . $type . '\/.*?)">\s+JSON.*?<\/a>/is', $data, $m);
foreach ($m[1] as $url) {
$file = str_replace(['/', '.'], '_', $url); // fix file name
if (!is_cached($file . '.json')) {
echo 'Downloading: ' . 'https://checksums.kubik-rubik.de/' . $url . PHP_EOL;
$json = file_get_contents('https://checksums.kubik-rubik.de/' . $url);
set_cache($file . '.json', $json);
} else {
$json = get_cache($file . '.json');
}
// decode json, write hash values to one file
echo 'Parsing json file: ' . $file . PHP_EOL;
foreach (json_decode($json) as $k => $hash) {
fputs($fp, $hash . "\n");
}
}
}
}
function fetch_jquery($fp)
{
echo 'Fetching jQuery' . PHP_EOL;
$data = file_get_contents('https://code.jquery.com/jquery/');
preg_match_all('/<a class=\'open\-sri\-modal\' href=\'\/(jquery-.*?\.js)/', $data, $m);
foreach ($m[1] as $file) {
if (!is_cached($file)) {
echo 'Downloading: ' . 'https://code.jquery.com/' . $file . PHP_EOL;
$data = file_get_contents('https://code.jquery.com/' . $file);
set_cache($file, $data);
} else {
$data = get_cache($file);
}
fputs($fp, md5($data) . "\n");
}
}
if (!is_readable($cache_dir)) {
if (!mkdir($cache_dir)) {
die('Unable to create cache directory');
}
}
$fp = fopen('all.txt', 'w');
fetch_kubik_rubik($fp);
fetch_jquery($fp);
fclose($fp);
echo 'Creating unique database' . PHP_EOL;
exec('sort -u -o unique.txt all.txt');
echo 'Compressing all.txt' . PHP_EOL;
exec('gzip < unique.txt > compressed.dat'); // gzencode
$hash = hash_file('sha256', 'compressed.dat');
file_put_contents('compressed.sha256', $hash);
echo 'SHA256 is ' . $hash . PHP_EOL;