diff --git a/tools/bigdata/generate.php b/tools/bigdata/generate.php index 510455a..e6287fb 100644 --- a/tools/bigdata/generate.php +++ b/tools/bigdata/generate.php @@ -15,8 +15,8 @@ function fetch($url, $file = false) $headers = array( // drupal suxx - 'Cookie: pxvid=44e1b040-4dde-11e8-b1dc-f15e898556c7; _ga=GA1.2.2042202377.1525247839; _gat=1; _gid=GA1.2.1601332121.1550831838; _px2=eyJ1IjoiZDM3OTk1MDAtMzY4ZC0xMWU5LWI3MDItYTdlMDI1ZWZhZmI2IiwidiI6IjQ0ZTFiMDQwLTRkZGUtMTFlOC1iMWRjLWYxNWU4OTg1NTZjNyIsInQiOjE1NTA4MzIxMzc5MjcsImgiOiJjMjBhNTQzNGIxYWQwNWFiOWUzNTI2OWRjNTM1MjgzNjkxNzg5OTIxNGM4YmIzZDBkZTg5ZTIxMzY0NTc5Zjk3In0=; has_js=1; _pxvid=44e1b040-4dde-11e8-b1dc-f15e898556c7', - 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15', + 'Cookie: _px2=eyJ1IjoiZDZhNGM3MjAtYjZmNC0xMWVhLWI2MzMtNzk5YzRmZjM4ZmJkIiwidiI6IjQ0ZTFiMDQwLTRkZGUtMTFlOC1iMWRjLWYxNWU4OTg1NTZjNyIsInQiOjE1OTMwOTc2Mjg2NzAsImgiOiIzNzk5N2RkYTU3ZTI1NGY0ZDM5MmRiMWExNWZhZjhjNTZkMmM5NTZkZDJiZWVkZGVlZDc1MThiNTE5MTFjYzgwIn0=; _ga=GA1.2.2042202377.1525247839; _gat=1; _gid=GA1.2.1034461360.1593095881; has_js=1; _pxff_fp=1; _pxff_rf=1; pxvid=44e1b040-4dde-11e8-b1dc-f15e898556c7', + 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15', ); curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); @@ -161,7 +161,7 @@ function fetch_typo3($fp) continue; } $file = 'type3-' . $release->version . '.tar.gz'; - fetch_archive($file, $release->url->tar, $release->checksums->tar->sha1, 'sha1'); + fetch_archive($file, 'https://get.typo3.org' . $release->url->tar, $release->checksums->tar->sha1, 'sha1'); hash_archive($fp, $file); } } @@ -185,38 +185,40 @@ function fetch_pagekit($fp) } // Ignored releases are: alpha, beta, rc, dev -function fetch_drupal($fp, $versions) +function fetch_drupal($fp) { - foreach ($versions as $version => $id) { - echo 'Fetching Drupal ' . $version . PHP_EOL; + echo 'Fetching Drupal ' . PHP_EOL; - $page = 0; - $pages = false; - do { - $data = fetch('https://www.drupal.org/project/drupal/releases?api_version%5B%5D=' . $id . '&page=' .$page); + $page = 0; + $pages = false; + do { + $data = fetch('https://www.drupal.org/project/drupal/releases?page=' . $page); - // pagination init - if ($pages === false && preg_match('/&page=(\d+)">last »<\/a>/', $data, $m)) { - $pages = $m[1]; + // pagination init + if ($pages === false && preg_match('/\?page=(\d+)">last »<\/a>/', $data, $m)) { + $pages = $m[1]; + } + + preg_match_all( + '/drupal/i', + $data, + $m + ); + foreach ($m[1] as $k => $ver_uri) { + $ver_data = fetch('https://www.drupal.org' . $ver_uri); + if (!preg_match('/([a-z0-9]+)<\/span>/i', $ver_data, $ver_m)) { + die('Missing hash info: ' . $m[2][$k]); } + $file = 'drupal-' . $m[2][$k] . '.tar.gz'; + fetch_archive($file, 'https://ftp.drupal.org/files/projects/' . $file, $ver_m[1], 'md5'); + hash_archive($fp, $file); + } - preg_match_all( - '/data-th="Download">(.*?)\s*([a-z0-9]{32})\s*<\/td>/is', - $data, - $m - ); - foreach ($m[3] as $k => $file) { - fetch_archive($file, $m[2][$k], $m[5][$k], 'md5'); - hash_archive($fp, $file); - } - - if ($pages === false) { - break; - } - - $page++; - }while($page <= $pages); - } + if ($pages === false) { + break; + } + $page++; + } while ($page <= $pages); } function fetch_joomla($fp, $versions) @@ -272,24 +274,7 @@ fetch_jquery($fp); fetch_wordpress($fp); fetch_typo3($fp); fetch_pagekit($fp); -fetch_drupal( - $fp, - [ - '9.x' => 39794, - '8.x' => 7234, - '7.x' => 103, - '6.x' => 87, - '5.x' => 78, - '4.7.x' => 79, - '4.6.x' => 80, - '4.5.x' => 81, - '4.4.x' => 82, - '4.3.x' => 83, - '4.2.x' => 84, - '4.1.x' => 85, - '4.0.x' => 86 - ] -); +fetch_drupal($fp); fetch_joomla($fp, ['3.0' => 3, '2.5' => 25, '1.5' => 15, '1.0' => 10]); fclose($fp);