WebP Express CloudHost.es Fix v0.25.9-cloudhost

✅ Fixed bulk conversion getting stuck on missing files ✅ Added robust error handling and timeout protection ✅ Improved JavaScript response parsing ✅ Added file existence validation ✅ Fixed missing PHP class imports ✅ Added comprehensive try-catch error recovery 🔧 Key fixes: - File existence checks before conversion attempts - 30-second timeout protection per file - Graceful handling of 500 errors and JSON parsing issues - Automatic continuation to next file on failures - Cache busting for JavaScript updates 🎯 Result: Bulk conversion now completes successfully even with missing files 🚀 Generated with Claude Code (https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-23 10:22:32 +02:00
commit 37cf714058
553 changed files with 55249 additions and 0 deletions
--- a/vendor/rosell-dk/dom-util-for-webp/src/ImageUrlReplacer.php
+++ b/vendor/rosell-dk/dom-util-for-webp/src/ImageUrlReplacer.php
@@ -0,0 +1,247 @@
+<?php
+
+namespace DOMUtilForWebP;
+
+//use Sunra\PhpSimple\HtmlDomParser;
+use KubAT\PhpSimple\HtmlDomParser;
+
+/**
+ *  Highly configurable class for replacing image URLs in HTML (both src and srcset syntax)
+ *
+ *  Uses http://simplehtmldom.sourceforge.net/ - a library for easily manipulating HTML by means of a DOM.
+ *  The great thing about this library is that it supports working on invalid HTML and it only applies the changes you
+ *  make - very gently (however, not as gently as we do in PictureTags).
+ *  PS: The library is a bit old, so perhaps we should look for another.
+ *  ie https://packagist.org/packages/masterminds/html5 ??
+ *
+ *  Behaviour can be customized by overriding the public methods (replaceUrl, $searchInTags, etc)
+ *
+ *  Default behaviour:
+ *  - The modified URL is the same as the original, with ".webp" appended                   (replaceUrl)
+ *  - Limits to these tags: <img>, <source>, <input> and <iframe>                           ($searchInTags)
+ *  - Limits to these attributes: "src", "src-set" and any attribute starting with "data-"  (attributeFilter)
+ *  - Only replaces URLs that ends with "png", "jpg" or "jpeg" (no query strings either)    (replaceUrl)
+ *
+ *
+ */
+class ImageUrlReplacer
+{
+
+    // define tags to be searched.
+    // The div and li are on the list because these are often used with lazy loading
+    // should we add <meta> ?
+    // Probably not for open graph images or twitter
+    // so not these:
+    // - <meta property="og:image" content="[url]">
+    // - <meta property="og:image:secure_url" content="[url]">
+    // - <meta name="twitter:image" content="[url]">
+    // Meta can also be used in schema.org micro-formatting, ie:
+    // - <meta itemprop="image" content="[url]">
+    //
+    // How about preloaded images? - yes, suppose we should replace those
+    // - <link rel="prefetch" href="[url]">
+    // - <link rel="preload" as="image" href="[url]">
+    public static $searchInTags = ['img', 'source', 'input', 'iframe', 'div', 'li', 'link', 'a', 'section', 'video'];
+
+    /**
+     * Empty constructor for preventing child classes from creating constructors.
+     *
+     * We do this because otherwise the "new static()" call inside the ::replace() method
+     * would be unsafe. See #21
+     * @return  void
+     */
+    final public function __construct()
+    {
+    }
+
+    /**
+     *
+     * @return string|null webp url or, if URL should not be changed, return nothing
+     **/
+    public function replaceUrl($url)
+    {
+        if (!preg_match('#(png|jpe?g)$#', $url)) {
+            return null;
+        }
+        return $url . '.webp';
+    }
+
+    public function replaceUrlOr($url, $returnValueIfDenied)
+    {
+        $url = $this->replaceUrl($url);
+        return (isset($url) ? $url : $returnValueIfDenied);
+    }
+
+    /*
+    public function isValidUrl($url)
+    {
+        return preg_match('#(png|jpe?g)$#', $url);
+    }*/
+
+    public function handleSrc($attrValue)
+    {
+        return $this->replaceUrlOr($attrValue, $attrValue);
+    }
+
+    public function handleSrcSet($attrValue)
+    {
+        // $attrValue is ie: <img data-x="1.jpg 1000w, 2.jpg">
+        $srcsetArr = explode(',', $attrValue);
+        foreach ($srcsetArr as $i => $srcSetEntry) {
+            // $srcSetEntry is ie "image.jpg 520w", but can also lack width, ie just "image.jpg"
+            // it can also be ie "image.jpg 2x"
+            $srcSetEntry = trim($srcSetEntry);
+            $entryParts = preg_split('/\s+/', $srcSetEntry, 2);
+            if (count($entryParts) == 2) {
+                list($src, $descriptors) = $entryParts;
+            } else {
+                $src = $srcSetEntry;
+                $descriptors = null;
+            }
+
+            $webpUrl = $this->replaceUrlOr($src, false);
+            if ($webpUrl !== false) {
+                $srcsetArr[$i] = $webpUrl . (isset($descriptors) ? ' ' . $descriptors : '');
+            }
+        }
+        return implode(', ', $srcsetArr);
+    }
+
+    /**
+     *  Test if attribute value looks like it has srcset syntax.
+     *  "image.jpg 100w" does for example. And "image.jpg 1x". Also "image1.jpg, image2.jpg 1x"
+     *  Mixing x and w is invalid (according to
+     *         https://stackoverflow.com/questions/26928828/html5-srcset-mixing-x-and-w-syntax)
+     *  But we accept it anyway
+     *  It is not the job of this function to see if the first part is an image URL
+     *  That will be done in handleSrcSet.
+     *
+     */
+    public function looksLikeSrcSet($value)
+    {
+        if (preg_match('#\s\d*(w|x)#', $value)) {
+            return true;
+        }
+        return false;
+    }
+
+    public function handleAttribute($value)
+    {
+        if (self::looksLikeSrcSet($value)) {
+            return self::handleSrcSet($value);
+        }
+        return self::handleSrc($value);
+    }
+
+    public function attributeFilter($attrName)
+    {
+        $attrName = strtolower($attrName);
+        if (($attrName == 'src') || ($attrName == 'srcset') || (strpos($attrName, 'data-') === 0)) {
+            return true;
+        }
+        return false;
+    }
+
+    public function processCSSRegExCallback($matches)
+    {
+        list($all, $pre, $quote, $url, $post) = $matches;
+        return $pre . $this->replaceUrlOr($url, $url) . $post;
+    }
+
+    public function processCSS($css)
+    {
+        $declarations = explode(';', $css);
+        foreach ($declarations as $i => &$declaration) {
+            if (preg_match('#(background(-image)?)\\s*:#', $declaration)) {
+                // https://regexr.com/46qdg
+                //$regex = '#(url\s*\(([\"\']?))([^\'\";\)]*)(\2\s*\))#';
+                $parts = explode(',', $declaration);
+                //print_r($parts);
+                foreach ($parts as &$part) {
+                    //echo 'part:' . $part . "\n";
+                    $regex = '#(url\\s*\\(([\\"\\\']?))([^\\\'\\";\\)]*)(\\2\\s*\\))#';
+                    $part = preg_replace_callback(
+                        $regex,
+                        '\DOMUtilForWebP\ImageUrlReplacer::processCSSRegExCallback',
+                        $part
+                    );
+                    //echo 'result:' . $part . "\n";
+                }
+                $declarations[$i] = implode(',', $parts);
+            }
+        }
+        return implode(';', $declarations);
+    }
+
+    public function replaceHtml($html)
+    {
+        if ($html == '') {
+            return '';
+        }
+
+        // https://stackoverflow.com/questions/4812691/preserve-line-breaks-simple-html-dom-parser
+
+        // function str_get_html($str, $lowercase=true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET,
+        //    $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
+
+        $dom = HtmlDomParser::str_get_html($html, false, true, 'UTF-8', false);
+        //$dom = str_get_html($html, false, false, 'UTF-8', false);
+
+
+        // MAX_FILE_SIZE is defined in simple_html_dom.
+        // For safety sake, we make sure it is defined before using
+        defined('MAX_FILE_SIZE') || define('MAX_FILE_SIZE', 600000);
+
+        if ($dom === false) {
+            if (strlen($html) > MAX_FILE_SIZE) {
+                return '<!-- Alter HTML was skipped because the HTML is too big to process! ' .
+                    '(limit is set to ' . MAX_FILE_SIZE . ' bytes) -->' . "\n" . $html;
+            }
+            return '<!-- Alter HTML was skipped because the helper library refused to process the html -->' .
+                "\n" . $html;
+        }
+
+        // Replace attributes (src, srcset, data-src, etc)
+        foreach (self::$searchInTags as $tagName) {
+            $elems = $dom->find($tagName);
+            foreach ($elems as $index => $elem) {
+                $attributes = $elem->getAllAttributes();
+                foreach ($elem->getAllAttributes() as $attrName => $attrValue) {
+                    if ($this->attributeFilter($attrName)) {
+                        $elem->setAttribute($attrName, $this->handleAttribute($attrValue));
+                    }
+                }
+            }
+        }
+
+        // Replace <style> elements
+        $elems = $dom->find('style');
+        foreach ($elems as $index => $elem) {
+            $css = $this->processCSS($elem->innertext);
+            if ($css != $elem->innertext) {
+                $elem->innertext = $css;
+            }
+        }
+
+        // Replace "style attributes
+        $elems = $dom->find('*[style]');
+        foreach ($elems as $index => $elem) {
+            $css = $this->processCSS($elem->style);
+            if ($css != $elem->style) {
+                $elem->style = $css;
+            }
+        }
+
+        return $dom->save();
+    }
+
+    /* Main replacer function */
+    public static function replace($html)
+    {
+        /*if (!function_exists('str_get_html')) {
+            require_once __DIR__ . '/../src-vendor/simple_html_dom/simple_html_dom.inc';
+        }*/
+        $iur = new static();
+        return $iur->replaceHtml($html);
+    }
+}
--- a/vendor/rosell-dk/dom-util-for-webp/src/PictureTags.php
+++ b/vendor/rosell-dk/dom-util-for-webp/src/PictureTags.php
@@ -0,0 +1,337 @@
+<?php
+
+namespace DOMUtilForWebP;
+
+//use Sunra\PhpSimple\HtmlDomParser;
+use KubAT\PhpSimple\HtmlDomParser;
+
+/**
+ * Class PictureTags - convert an <img> tag to a <picture> tag and add the webp versions of the images
+ * Code is based on code from the ShortPixel plugin, which in turn used code from Responsify WP plugin
+ *
+ * It works like this:
+ *
+ * 1. Remove existing <picture> tags and their content - replace with tokens in order to reinsert later
+ * 2. Process <img> tags.
+ *    - The tags are found with regex.
+ *    - The attributes are parsed with DOMDocument if it exists, otherwise with the Simple Html Dom library,
+ *      which is included inside this library
+ * 3. Re-insert the existing <picture> tags
+ *
+ * This procedure is very gentle and needle-like. No need for a complete parse - so invalid HTML is no big issue
+ *
+ * PS:
+ * https://packagist.org/packages/masterminds/html5
+ */
+
+
+class PictureTags
+{
+
+    /**
+     * Empty constructor for preventing child classes from creating constructors.
+     *
+     * We do this because otherwise the "new static()" call inside the ::replace() method
+     * would be unsafe. See #21
+     * @return  void
+     */
+    final public function __construct()
+    {
+        $this->existingPictureTags = [];
+    }
+
+    private $existingPictureTags;
+
+    public function replaceUrl($url)
+    {
+        if (!preg_match('#(png|jpe?g)$#', $url)) {
+            return;
+        }
+        return $url . '.webp';
+    }
+
+    public function replaceUrlOr($url, $returnValueIfDenied)
+    {
+        $url = $this->replaceUrl($url);
+        return (isset($url) ? $url : $returnValueIfDenied);
+    }
+
+    /**
+     * Look for attributes such as "data-lazy-src" and "data-src" and prefer them over "src"
+     *
+     * @param  array  $attributes  an array of attributes for the element
+     * @param  string  $attrName    ie "src", "srcset" or "sizes"
+     *
+     * @return array  an array with "value" key and "attrName" key. ("value" is the value of the attribute and
+     *                                    "attrName" is the name of the attribute used)
+     *
+     */
+    private static function lazyGet($attributes, $attrName)
+    {
+        return array(
+            'value' =>
+                (isset($attributes['data-lazy-' . $attrName]) && strlen($attributes['data-lazy-' . $attrName])) ?
+                    trim($attributes['data-lazy-' . $attrName])
+                    : (isset($attributes['data-' . $attrName]) && strlen($attributes['data-' . $attrName]) ?
+                        trim($attributes['data-' . $attrName])
+                        : (isset($attributes[$attrName]) && strlen($attributes[$attrName]) ?
+                            trim($attributes[$attrName]) : false)),
+            'attrName' =>
+                (isset($attributes['data-lazy-' . $attrName]) && strlen($attributes['data-lazy-' . $attrName])) ?
+                    'data-lazy-' . $attrName
+                    : (isset($attributes['data-' . $attrName]) && strlen($attributes['data-' . $attrName]) ?
+                        'data-' . $attrName
+                        : (isset($attributes[$attrName]) && strlen($attributes[$attrName]) ? $attrName : false))
+        );
+    }
+
+    /**
+     * Look for attribute such as "src", but also with prefixes such as "data-lazy-src" and "data-src"
+     *
+     * @param  array  $attributes  an array of all attributes for the element
+     * @param  string  $attrName    ie "src", "srcset" or "sizes"
+     *
+     * @return array  an array with "value" key and "attrName" key. ("value" is the value of the attribute and
+     *                                    "attrName" is the name of the attribute used)
+     *
+     */
+    private static function findAttributesWithNameOrPrefixed($attributes, $attrName)
+    {
+        $tryThesePrefixes = ['', 'data-lazy-', 'data-'];
+        $result = [];
+        foreach ($tryThesePrefixes as $prefix) {
+            $name = $prefix . $attrName;
+            if (isset($attributes[$name]) && strlen($attributes[$name])) {
+                /*$result[] = [
+                    'value' => trim($attributes[$name]),
+                    'attrName' => $name,
+                ];*/
+                $result[$name] = trim($attributes[$name]);
+            }
+        }
+        return $result;
+    }
+
+    /**
+     *  Convert to UTF-8 and encode chars outside of ascii-range
+     *
+     *  Input: html that might be in any character encoding and might contain non-ascii characters
+     *  Output: html in UTF-8 encding, where non-ascii characters are encoded
+     *
+     */
+    private static function textToUTF8WithNonAsciiEncoded($html)
+    {
+        if (function_exists("mb_convert_encoding")) {
+            $html = mb_convert_encoding($html, 'UTF-8');
+            $html = mb_encode_numericentity($html, array (0x7f, 0xffff, 0, 0xffff), 'UTF-8');
+        }
+        return $html;
+    }
+
+    private static function getAttributes($html)
+    {
+        if (class_exists('\\DOMDocument')) {
+            $dom = new \DOMDocument();
+
+            if (function_exists("mb_encode_numericentity")) {
+                // I'm in doubt if I should add the following line (see #41)
+                // $html = mb_convert_encoding($html, 'UTF-8');
+                $html = mb_encode_numericentity($html, array (0x7f, 0xffff, 0, 0xffff));  // #41
+            }
+
+            @$dom->loadHTML($html);
+            $image = $dom->getElementsByTagName('img')->item(0);
+            $attributes = [];
+            foreach ($image->attributes as $attr) {
+                $attributes[$attr->nodeName] = $attr->nodeValue;
+            }
+            return $attributes;
+        } else {
+            // Convert to UTF-8 because HtmlDomParser::str_get_html needs to be told the
+            // encoding. As UTF-8 might conflict with the charset set in the meta, we must
+            // encode all characters outside the ascii-range.
+            // It would perhaps have been better to try to guess the encoding rather than
+            // changing it (see #39), but I'm reluctant to introduce changes.
+            $html =  self::textToUTF8WithNonAsciiEncoded($html);
+            $dom = HtmlDomParser::str_get_html($html, false, true, 'UTF-8', false);
+            if ($dom !== false) {
+                $elems = $dom->find('img,IMG');
+                foreach ($elems as $index => $elem) {
+                    $attributes = [];
+                    foreach ($elem->getAllAttributes() as $attrName => $attrValue) {
+                        $attributes[strtolower($attrName)] = $attrValue;
+                    }
+                    return $attributes;
+                }
+            }
+            return [];
+        }
+    }
+
+    /**
+     * Makes a string with all attributes.
+     *
+     * @param  array $attribute_array
+     * @return string
+     */
+    private static function createAttributes($attribute_array)
+    {
+        $attributes = '';
+        foreach ($attribute_array as $attribute => $value) {
+            $attributes .= $attribute . '="' . $value . '" ';
+        }
+        if ($attributes == '') {
+            return '';
+        }
+        // Removes the extra space after the last attribute. Add space before
+        return ' ' . substr($attributes, 0, -1);
+    }
+
+    /**
+     *  Replace <img> tag with <picture> tag.
+     */
+    private function replaceCallback($match)
+    {
+        $imgTag = $match[0];
+
+        // Do nothing with images that have the 'webpexpress-processed' class.
+        if (strpos($imgTag, 'webpexpress-processed')) {
+            return $imgTag;
+        }
+        $imgAttributes = self::getAttributes($imgTag);
+
+        $srcInfo = self::lazyGet($imgAttributes, 'src');
+        $srcsetInfo = self::lazyGet($imgAttributes, 'srcset');
+        $sizesInfo = self::lazyGet($imgAttributes, 'sizes');
+
+        $srcSetAttributes = self::findAttributesWithNameOrPrefixed($imgAttributes, 'srcset');
+        $srcAttributes = self::findAttributesWithNameOrPrefixed($imgAttributes, 'src');
+
+        if ((!isset($srcSetAttributes['srcset'])) && (!isset($srcAttributes['src']))) {
+            // better not mess with this html...
+            return $imgTag;
+        }
+
+        // add the exclude class so if this content is processed again in other filter,
+        // the img is not converted again in picture
+        $imgAttributes['class'] = (isset($imgAttributes['class']) ? $imgAttributes['class'] . " " : "") .
+            "webpexpress-processed";
+
+        // Process srcset (also data-srcset etc)
+        $atLeastOneWebp = false;
+        $sourceTagAttributes = [];
+        foreach ($srcSetAttributes as $attrName => $attrValue) {
+            $srcsetArr = explode(', ', $attrValue);
+            $srcsetArrWebP = [];
+            foreach ($srcsetArr as $i => $srcSetEntry) {
+                // $srcSetEntry is ie "http://example.com/image.jpg 520w"
+                $result = preg_split('/\s+/', trim($srcSetEntry));
+                $src = trim($srcSetEntry);
+                $width = null;
+                if ($result && count($result) >= 2) {
+                    list($src, $width) = $result;
+                }
+
+                $webpUrl = $this->replaceUrlOr($src, false);
+                if ($webpUrl == false) {
+                  // We want ALL of the sizes as webp.
+                  // If we cannot have that, it is better to abort! - See #42
+                    return $imgTag;
+                } else {
+                    if (substr($src, 0, 5) != 'data:') {
+                        $atLeastOneWebp = true;
+                        $srcsetArrWebP[] = $webpUrl . (isset($width) ? ' ' . $width : '');
+                    }
+                }
+            }
+            $sourceTagAttributes[$attrName] = implode(', ', $srcsetArrWebP);
+        }
+
+        foreach ($srcAttributes as $attrName => $attrValue) {
+            if (substr($attrValue, 0, 5) == 'data:') {
+                // ignore tags with data urls, such as <img src="data:...
+                return $imgTag;
+            }
+            // Make sure not to override existing srcset with src
+            if (!isset($sourceTagAttributes[$attrName . 'set'])) {
+                $srcWebP = $this->replaceUrlOr($attrValue, false);
+                if ($srcWebP !== false) {
+                    $atLeastOneWebp = true;
+                }
+                $sourceTagAttributes[$attrName . 'set'] = $srcWebP;
+            }
+        }
+
+        if ($sizesInfo['value']) {
+            $sourceTagAttributes[$sizesInfo['attrName']] = $sizesInfo['value'];
+        }
+
+        if (!$atLeastOneWebp) {
+            // We have no webps for you, so no reason to create <picture> tag
+            return $imgTag;
+        }
+
+        return '<picture>'
+            . '<source' . self::createAttributes($sourceTagAttributes) . ' type="image/webp">'
+            . '<img' . self::createAttributes($imgAttributes) . '>'
+            . '</picture>';
+    }
+
+    /*
+     *
+     */
+    public function removePictureTagsTemporarily($content)
+    {
+        //print_r($content);
+        $this->existingPictureTags[] = $content[0];
+        return 'PICTURE_TAG_' . (count($this->existingPictureTags) - 1) . '_';
+    }
+
+    /*
+     *
+     */
+    public function insertPictureTagsBack($content)
+    {
+        $numberString = $content[1];
+        $numberInt = intval($numberString);
+        return $this->existingPictureTags[$numberInt];
+    }
+
+    /**
+     *
+     */
+    public function replaceHtml($content)
+    {
+        if (!class_exists('\\DOMDocument') && function_exists('mb_detect_encoding')) {
+            // PS: Correctly identifying Windows-1251 encoding only works on some systems
+            //     But at least I'm not aware of any false positives
+            if (mb_detect_encoding($content, ["ASCII", "UTF8", "Windows-1251"]) == 'Windows-1251') {
+                $content = mb_convert_encoding($content, 'UTF-8', 'Windows-1251');
+            }
+        }
+
+        $this->existingPictureTags = [];
+
+        // Tempororily remove existing <picture> tags
+        $content = preg_replace_callback(
+            '/<picture[^>]*>.*?<\/picture>/is',
+            array($this, 'removePictureTagsTemporarily'),
+            $content
+        );
+
+        // Replace "<img>" tags
+        $content = preg_replace_callback('/<img[^>]*>/i', array($this, 'replaceCallback'), $content);
+
+        // Re-insert <picture> tags that was removed
+        $content = preg_replace_callback('/PICTURE_TAG_(\d+)_/', array($this, 'insertPictureTagsBack'), $content);
+
+        return $content;
+    }
+
+    /* Main replacer function */
+    public static function replace($html)
+    {
+        $pt = new static();
+        return $pt->replaceHtml($html);
+    }
+}