isFile()) { // store md5 hash we use that in the scanner fputs($f, md5(file_get_contents($splFileInfo->getPathname())) . "\n"); } } fclose($f); } fputs($fp, file_get_contents($hash_file)); } function fetch_jquery($fp) { echo 'Fetching jQuery' . PHP_EOL; $data = fetch('https://code.jquery.com/jquery/'); preg_match_all( '/ $file) { if (!is_cached($file)) { echo 'Downloading: ' . 'https://code.jquery.com/' . $file . PHP_EOL; $data = fetch('https://code.jquery.com/' . $file) . PHP_EOL; if (base64_encode(hash('sha256', $data, true)) != $m[2][$k]) { die('Hash mismatch' . PHP_EOL); } set_cache($file, $data, 'sha256', bin2hex(base64_decode($m[2][$k]))); } else { $data = get_cache($file); } // store md5 hash we use that in the scanner fputs($fp, md5($data) . "\n"); } } function fetch_archive($file, $url, $hash, $algo, $hash_url = null) { $tmp = __DIR__ . 'dl.tar.gz'; if (!is_cached($file)) { echo 'Downloading: ' . $url . PHP_EOL; fetch($url, $tmp); if (!empty($hash_url)) { echo 'Downloading hash: ' . $hash_url . PHP_EOL; $hash = fetch($hash_url); } $data_hash = hash_file($algo, $tmp); if ($data_hash != $hash) { die('Hash mismatch: ' . $data_hash . ' != ' . $hash . PHP_EOL); } set_cache($file, file_get_contents($tmp), $algo, $hash); } } // Ignored releases are: beta, RC, strayhorn, mingus, delta, gold and mu by regexp and 1.0.2 because no sha1 function fetch_wordpress($fp) { echo 'Fetching Wordpress' . PHP_EOL; $data = fetch('https://wordpress.org/download/releases/'); preg_match_all( '//', $data, $m ); foreach ($m[2] as $k => $file) { if ($m[2][$k] == 'wordpress-1.0.2.tar.gz') { // no sha1 info continue; } fetch_archive($m[2][$k], $m[1][$k], null, 'sha1', $m[1][$k] . '.sha1'); hash_archive($fp, $file); } } // Ignores: snapshots, rc, beta, alpha function fetch_typo3($fp) { echo 'Fetching Typo3' . PHP_EOL; $data = json_decode(fetch('https://get.typo3.org/json')); foreach ($data as $value) { if (isset($value->releases)) { foreach ($value->releases as $release) { if (strstr($release->version, 'snapshot') || strstr($release->version, 'rc') || strstr($release->version, 'beta') || strstr($release->version, 'alpha')) { // ignoring snapshots continue; } if (in_array($release->version, ['4.6.0', '4.5.33', '3.3.0'])) { // The specified blob does not exist. // 3.3.0 is damaged archive continue; } $file = 'type3-' . $release->version . '.tar.gz'; fetch_archive($file, 'https://get.typo3.org' . $release->url->tar, $release->checksums->tar->sha1, 'sha1'); hash_archive($fp, $file); } } } } function fetch_pagekit($fp) { echo 'Fetching Pagekit' . PHP_EOL; $data = json_decode(fetch('https://pagekit.com/api/update')); foreach ($data as $k => $releases) { if ($k == 'latest') { $releases = [$releases]; } foreach ($releases as $release) { $file = 'pagekit-' . $release->version . '.tar.gz'; fetch_archive($file, $release->url, $release->shasum, 'sha1'); hash_archive($fp, $file); } } } // Ignored releases are: alpha, beta, rc, dev function fetch_drupal($fp) { echo 'Fetching Drupal ' . PHP_EOL; $page = 0; $pages = false; do { $data = fetch('https://www.drupal.org/project/drupal/releases?page=' . $page); // pagination init if ($pages === false && preg_match('/\?page=(\d+)">last ยป<\/a>/', $data, $m)) { $pages = $m[1]; } preg_match_all( '/drupal/i', $data, $m ); foreach ($m[1] as $k => $ver_uri) { $ver_data = fetch('https://www.drupal.org' . $ver_uri); if (!preg_match('/([a-z0-9]+)<\/span>/i', $ver_data, $ver_m)) { die('Missing hash info: ' . $m[2][$k]); } $file = 'drupal-' . $m[2][$k] . '.tar.gz'; fetch_archive($file, 'https://ftp.drupal.org/files/projects/' . $file, $ver_m[1], 'md5'); hash_archive($fp, $file); } if ($pages === false) { break; } $page++; } while ($page <= $pages); } function fetch_joomla($fp, $versions) { foreach ($versions as $version => $id) { echo 'Fetching Joomla ' . $version . PHP_EOL; $data = fetch('https://downloads.joomla.org/cms/joomla' . $id); preg_match_all('/href="(\/cms\/joomla\d+\/(\d+\-\d+\-\d+))"/', $data, $m); foreach ($m[1] as $k => $url) { $file = 'joomla_' . $m[2][$k] . '-stable-full_package.tar.gz'; // pre check because we need hash information if (!is_cached($file)) { $data = fetch('https://downloads.joomla.org' . $url); if (!preg_match('/Joomla! '.str_replace('-', '\.', $m[2][$k]).' Full Package \(\.tar\.gz\).*?SHA1 Signature\s*<\/dt>\s*
\s*([a-z0-9]{40})\s*<\/dd>/is', $data, $m2)) { echo 'Unable to find SHA1 signature for version ' . $m[2][$k] . PHP_EOL; break; } if (!preg_match('/href="('.preg_quote($url, '/').'\/.*?format=gz)"/', $data, $m3)) { echo 'Unable to find archive url for version ' . $m[2][$k] . PHP_EOL; break; } fetch_archive($file, 'https://downloads.joomla.org' . $m3[1], $m2[1], 'sha1'); } hash_archive($fp, $file); } } } if ($argc == 2) { $cache_dir = $argv[1]; } else { $cache_dir = __DIR__ . '/cache'; } if (!is_readable($cache_dir)) { if (!mkdir($cache_dir)) { die('Unable to create cache directory'); } } $fp = fopen('all.txt', 'w'); // TODO https://modx.com/download/other-downloads // TODO wordpress plugins only popular ones fetch_jquery($fp); fetch_wordpress($fp); fetch_typo3($fp); fetch_pagekit($fp); fetch_drupal($fp); fetch_joomla($fp, ['3.0' => 3, '2.5' => 25, '1.5' => 15, '1.0' => 10]); fclose($fp); echo 'Creating unique database' . PHP_EOL; exec('sort -u -o unique.txt all.txt'); echo 'Compressing all.txt' . PHP_EOL; exec('gzip < unique.txt > compressed.dat'); // gzencode $hash = hash_file('sha256', 'compressed.dat'); file_put_contents('compressed.sha256', $hash); echo 'SHA256 is ' . $hash . PHP_EOL;