diff --git a/tools/bigdata/generate.php b/tools/bigdata/generate.php index d9cd239..81386ce 100644 --- a/tools/bigdata/generate.php +++ b/tools/bigdata/generate.php @@ -1,41 +1,92 @@ \s+JSON.*?<\/a>/is', $data, $m); + foreach ($m[1] as $url) { + $file = str_replace(['/', '.'], '_', $url); // fix file name + if (!is_cached($file . '.json')) { + echo 'Downloading: ' . 'https://checksums.kubik-rubik.de/' . $url . PHP_EOL; + $json = file_get_contents('https://checksums.kubik-rubik.de/' . $url); + set_cache($file . '.json', $json); + } else { + $json = get_cache($file . '.json'); + } + + // decode json, write hash values to one file + echo 'Parsing json file: ' . $file . PHP_EOL; + foreach (json_decode($json) as $k => $hash) { + fputs($fp, $hash . "\n"); + } + } } } -echo 'Fetching checksums.kubik-rubik.de' . PHP_EOL; -$data = file_get_contents('https://checksums.kubik-rubik.de'); +function fetch_jquery($fp) +{ + echo 'Fetching jQuery' . PHP_EOL; + $data = file_get_contents('https://code.jquery.com/jquery/'); + + preg_match_all('/\s+JSON.*?<\/a>/is', $data, $m); - foreach ($m[1] as $url) { - $file = str_replace(['/', '.'], '_', $url); // fix file name - if (!is_readable(__DIR__ . '/cache/' . $file . '.json')) { - echo 'Downloading: ' . 'https://checksums.kubik-rubik.de/' . $url . PHP_EOL; - $json = file_get_contents('https://checksums.kubik-rubik.de/' . $url); - file_put_contents(__DIR__ . '/cache/' . $file . '.json', $json); - } else { - $json = file_get_contents(__DIR__ . '/cache/' . $file . '.json'); - } - - // decode json, write hash values to one file - echo 'Parsing json file: ' . $file . PHP_EOL; - foreach (json_decode($json) as $k => $hash) { - fputs($fp, $hash . "\n"); - } - } -} +fetch_kubik_rubik($fp); +fetch_jquery($fp); fclose($fp); -echo 'Create unique database' . PHP_EOL; +echo 'Creating unique database' . PHP_EOL; exec('sort -u -o unique.txt all.txt'); echo 'Compressing all.txt' . PHP_EOL; -exec('gzip < unique.txt > compressed.dat'); // gzencode \ No newline at end of file +exec('gzip < unique.txt > compressed.dat'); // gzencode + +$hash = hash_file('sha256', 'compressed.dat'); +file_put_contents('compressed.sha256', $hash); +echo 'SHA256 is ' . $hash . PHP_EOL; \ No newline at end of file diff --git a/tools/jquery.py b/tools/jquery.py deleted file mode 100644 index 2b00bd5..0000000 --- a/tools/jquery.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python - -import urllib2 -import re -import hashlib - -def fetch(url): - response = urllib2.urlopen(url) - return response.read() - -def main(): - html = fetch('https://code.jquery.com/jquery/') - regex = re.compile(r"