WebP Express CloudHost.es Fix v0.25.9-cloudhost

 Fixed bulk conversion getting stuck on missing files
 Added robust error handling and timeout protection
 Improved JavaScript response parsing
 Added file existence validation
 Fixed missing PHP class imports
 Added comprehensive try-catch error recovery

🔧 Key fixes:
- File existence checks before conversion attempts
- 30-second timeout protection per file
- Graceful handling of 500 errors and JSON parsing issues
- Automatic continuation to next file on failures
- Cache busting for JavaScript updates

🎯 Result: Bulk conversion now completes successfully even with missing files

🚀 Generated with Claude Code (https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-23 10:22:32 +02:00
commit 37cf714058
553 changed files with 55249 additions and 0 deletions

View File

@@ -0,0 +1,247 @@
<?php
namespace DOMUtilForWebP;
//use Sunra\PhpSimple\HtmlDomParser;
use KubAT\PhpSimple\HtmlDomParser;
/**
* Highly configurable class for replacing image URLs in HTML (both src and srcset syntax)
*
* Uses http://simplehtmldom.sourceforge.net/ - a library for easily manipulating HTML by means of a DOM.
* The great thing about this library is that it supports working on invalid HTML and it only applies the changes you
* make - very gently (however, not as gently as we do in PictureTags).
* PS: The library is a bit old, so perhaps we should look for another.
* ie https://packagist.org/packages/masterminds/html5 ??
*
* Behaviour can be customized by overriding the public methods (replaceUrl, $searchInTags, etc)
*
* Default behaviour:
* - The modified URL is the same as the original, with ".webp" appended (replaceUrl)
* - Limits to these tags: <img>, <source>, <input> and <iframe> ($searchInTags)
* - Limits to these attributes: "src", "src-set" and any attribute starting with "data-" (attributeFilter)
* - Only replaces URLs that ends with "png", "jpg" or "jpeg" (no query strings either) (replaceUrl)
*
*
*/
class ImageUrlReplacer
{
// define tags to be searched.
// The div and li are on the list because these are often used with lazy loading
// should we add <meta> ?
// Probably not for open graph images or twitter
// so not these:
// - <meta property="og:image" content="[url]">
// - <meta property="og:image:secure_url" content="[url]">
// - <meta name="twitter:image" content="[url]">
// Meta can also be used in schema.org micro-formatting, ie:
// - <meta itemprop="image" content="[url]">
//
// How about preloaded images? - yes, suppose we should replace those
// - <link rel="prefetch" href="[url]">
// - <link rel="preload" as="image" href="[url]">
public static $searchInTags = ['img', 'source', 'input', 'iframe', 'div', 'li', 'link', 'a', 'section', 'video'];
/**
* Empty constructor for preventing child classes from creating constructors.
*
* We do this because otherwise the "new static()" call inside the ::replace() method
* would be unsafe. See #21
* @return void
*/
final public function __construct()
{
}
/**
*
* @return string|null webp url or, if URL should not be changed, return nothing
**/
public function replaceUrl($url)
{
if (!preg_match('#(png|jpe?g)$#', $url)) {
return null;
}
return $url . '.webp';
}
public function replaceUrlOr($url, $returnValueIfDenied)
{
$url = $this->replaceUrl($url);
return (isset($url) ? $url : $returnValueIfDenied);
}
/*
public function isValidUrl($url)
{
return preg_match('#(png|jpe?g)$#', $url);
}*/
public function handleSrc($attrValue)
{
return $this->replaceUrlOr($attrValue, $attrValue);
}
public function handleSrcSet($attrValue)
{
// $attrValue is ie: <img data-x="1.jpg 1000w, 2.jpg">
$srcsetArr = explode(',', $attrValue);
foreach ($srcsetArr as $i => $srcSetEntry) {
// $srcSetEntry is ie "image.jpg 520w", but can also lack width, ie just "image.jpg"
// it can also be ie "image.jpg 2x"
$srcSetEntry = trim($srcSetEntry);
$entryParts = preg_split('/\s+/', $srcSetEntry, 2);
if (count($entryParts) == 2) {
list($src, $descriptors) = $entryParts;
} else {
$src = $srcSetEntry;
$descriptors = null;
}
$webpUrl = $this->replaceUrlOr($src, false);
if ($webpUrl !== false) {
$srcsetArr[$i] = $webpUrl . (isset($descriptors) ? ' ' . $descriptors : '');
}
}
return implode(', ', $srcsetArr);
}
/**
* Test if attribute value looks like it has srcset syntax.
* "image.jpg 100w" does for example. And "image.jpg 1x". Also "image1.jpg, image2.jpg 1x"
* Mixing x and w is invalid (according to
* https://stackoverflow.com/questions/26928828/html5-srcset-mixing-x-and-w-syntax)
* But we accept it anyway
* It is not the job of this function to see if the first part is an image URL
* That will be done in handleSrcSet.
*
*/
public function looksLikeSrcSet($value)
{
if (preg_match('#\s\d*(w|x)#', $value)) {
return true;
}
return false;
}
public function handleAttribute($value)
{
if (self::looksLikeSrcSet($value)) {
return self::handleSrcSet($value);
}
return self::handleSrc($value);
}
public function attributeFilter($attrName)
{
$attrName = strtolower($attrName);
if (($attrName == 'src') || ($attrName == 'srcset') || (strpos($attrName, 'data-') === 0)) {
return true;
}
return false;
}
public function processCSSRegExCallback($matches)
{
list($all, $pre, $quote, $url, $post) = $matches;
return $pre . $this->replaceUrlOr($url, $url) . $post;
}
public function processCSS($css)
{
$declarations = explode(';', $css);
foreach ($declarations as $i => &$declaration) {
if (preg_match('#(background(-image)?)\\s*:#', $declaration)) {
// https://regexr.com/46qdg
//$regex = '#(url\s*\(([\"\']?))([^\'\";\)]*)(\2\s*\))#';
$parts = explode(',', $declaration);
//print_r($parts);
foreach ($parts as &$part) {
//echo 'part:' . $part . "\n";
$regex = '#(url\\s*\\(([\\"\\\']?))([^\\\'\\";\\)]*)(\\2\\s*\\))#';
$part = preg_replace_callback(
$regex,
'\DOMUtilForWebP\ImageUrlReplacer::processCSSRegExCallback',
$part
);
//echo 'result:' . $part . "\n";
}
$declarations[$i] = implode(',', $parts);
}
}
return implode(';', $declarations);
}
public function replaceHtml($html)
{
if ($html == '') {
return '';
}
// https://stackoverflow.com/questions/4812691/preserve-line-breaks-simple-html-dom-parser
// function str_get_html($str, $lowercase=true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET,
// $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
$dom = HtmlDomParser::str_get_html($html, false, true, 'UTF-8', false);
//$dom = str_get_html($html, false, false, 'UTF-8', false);
// MAX_FILE_SIZE is defined in simple_html_dom.
// For safety sake, we make sure it is defined before using
defined('MAX_FILE_SIZE') || define('MAX_FILE_SIZE', 600000);
if ($dom === false) {
if (strlen($html) > MAX_FILE_SIZE) {
return '<!-- Alter HTML was skipped because the HTML is too big to process! ' .
'(limit is set to ' . MAX_FILE_SIZE . ' bytes) -->' . "\n" . $html;
}
return '<!-- Alter HTML was skipped because the helper library refused to process the html -->' .
"\n" . $html;
}
// Replace attributes (src, srcset, data-src, etc)
foreach (self::$searchInTags as $tagName) {
$elems = $dom->find($tagName);
foreach ($elems as $index => $elem) {
$attributes = $elem->getAllAttributes();
foreach ($elem->getAllAttributes() as $attrName => $attrValue) {
if ($this->attributeFilter($attrName)) {
$elem->setAttribute($attrName, $this->handleAttribute($attrValue));
}
}
}
}
// Replace <style> elements
$elems = $dom->find('style');
foreach ($elems as $index => $elem) {
$css = $this->processCSS($elem->innertext);
if ($css != $elem->innertext) {
$elem->innertext = $css;
}
}
// Replace "style attributes
$elems = $dom->find('*[style]');
foreach ($elems as $index => $elem) {
$css = $this->processCSS($elem->style);
if ($css != $elem->style) {
$elem->style = $css;
}
}
return $dom->save();
}
/* Main replacer function */
public static function replace($html)
{
/*if (!function_exists('str_get_html')) {
require_once __DIR__ . '/../src-vendor/simple_html_dom/simple_html_dom.inc';
}*/
$iur = new static();
return $iur->replaceHtml($html);
}
}

View File

@@ -0,0 +1,337 @@
<?php
namespace DOMUtilForWebP;
//use Sunra\PhpSimple\HtmlDomParser;
use KubAT\PhpSimple\HtmlDomParser;
/**
* Class PictureTags - convert an <img> tag to a <picture> tag and add the webp versions of the images
* Code is based on code from the ShortPixel plugin, which in turn used code from Responsify WP plugin
*
* It works like this:
*
* 1. Remove existing <picture> tags and their content - replace with tokens in order to reinsert later
* 2. Process <img> tags.
* - The tags are found with regex.
* - The attributes are parsed with DOMDocument if it exists, otherwise with the Simple Html Dom library,
* which is included inside this library
* 3. Re-insert the existing <picture> tags
*
* This procedure is very gentle and needle-like. No need for a complete parse - so invalid HTML is no big issue
*
* PS:
* https://packagist.org/packages/masterminds/html5
*/
class PictureTags
{
/**
* Empty constructor for preventing child classes from creating constructors.
*
* We do this because otherwise the "new static()" call inside the ::replace() method
* would be unsafe. See #21
* @return void
*/
final public function __construct()
{
$this->existingPictureTags = [];
}
private $existingPictureTags;
public function replaceUrl($url)
{
if (!preg_match('#(png|jpe?g)$#', $url)) {
return;
}
return $url . '.webp';
}
public function replaceUrlOr($url, $returnValueIfDenied)
{
$url = $this->replaceUrl($url);
return (isset($url) ? $url : $returnValueIfDenied);
}
/**
* Look for attributes such as "data-lazy-src" and "data-src" and prefer them over "src"
*
* @param array $attributes an array of attributes for the element
* @param string $attrName ie "src", "srcset" or "sizes"
*
* @return array an array with "value" key and "attrName" key. ("value" is the value of the attribute and
* "attrName" is the name of the attribute used)
*
*/
private static function lazyGet($attributes, $attrName)
{
return array(
'value' =>
(isset($attributes['data-lazy-' . $attrName]) && strlen($attributes['data-lazy-' . $attrName])) ?
trim($attributes['data-lazy-' . $attrName])
: (isset($attributes['data-' . $attrName]) && strlen($attributes['data-' . $attrName]) ?
trim($attributes['data-' . $attrName])
: (isset($attributes[$attrName]) && strlen($attributes[$attrName]) ?
trim($attributes[$attrName]) : false)),
'attrName' =>
(isset($attributes['data-lazy-' . $attrName]) && strlen($attributes['data-lazy-' . $attrName])) ?
'data-lazy-' . $attrName
: (isset($attributes['data-' . $attrName]) && strlen($attributes['data-' . $attrName]) ?
'data-' . $attrName
: (isset($attributes[$attrName]) && strlen($attributes[$attrName]) ? $attrName : false))
);
}
/**
* Look for attribute such as "src", but also with prefixes such as "data-lazy-src" and "data-src"
*
* @param array $attributes an array of all attributes for the element
* @param string $attrName ie "src", "srcset" or "sizes"
*
* @return array an array with "value" key and "attrName" key. ("value" is the value of the attribute and
* "attrName" is the name of the attribute used)
*
*/
private static function findAttributesWithNameOrPrefixed($attributes, $attrName)
{
$tryThesePrefixes = ['', 'data-lazy-', 'data-'];
$result = [];
foreach ($tryThesePrefixes as $prefix) {
$name = $prefix . $attrName;
if (isset($attributes[$name]) && strlen($attributes[$name])) {
/*$result[] = [
'value' => trim($attributes[$name]),
'attrName' => $name,
];*/
$result[$name] = trim($attributes[$name]);
}
}
return $result;
}
/**
* Convert to UTF-8 and encode chars outside of ascii-range
*
* Input: html that might be in any character encoding and might contain non-ascii characters
* Output: html in UTF-8 encding, where non-ascii characters are encoded
*
*/
private static function textToUTF8WithNonAsciiEncoded($html)
{
if (function_exists("mb_convert_encoding")) {
$html = mb_convert_encoding($html, 'UTF-8');
$html = mb_encode_numericentity($html, array (0x7f, 0xffff, 0, 0xffff), 'UTF-8');
}
return $html;
}
private static function getAttributes($html)
{
if (class_exists('\\DOMDocument')) {
$dom = new \DOMDocument();
if (function_exists("mb_encode_numericentity")) {
// I'm in doubt if I should add the following line (see #41)
// $html = mb_convert_encoding($html, 'UTF-8');
$html = mb_encode_numericentity($html, array (0x7f, 0xffff, 0, 0xffff)); // #41
}
@$dom->loadHTML($html);
$image = $dom->getElementsByTagName('img')->item(0);
$attributes = [];
foreach ($image->attributes as $attr) {
$attributes[$attr->nodeName] = $attr->nodeValue;
}
return $attributes;
} else {
// Convert to UTF-8 because HtmlDomParser::str_get_html needs to be told the
// encoding. As UTF-8 might conflict with the charset set in the meta, we must
// encode all characters outside the ascii-range.
// It would perhaps have been better to try to guess the encoding rather than
// changing it (see #39), but I'm reluctant to introduce changes.
$html = self::textToUTF8WithNonAsciiEncoded($html);
$dom = HtmlDomParser::str_get_html($html, false, true, 'UTF-8', false);
if ($dom !== false) {
$elems = $dom->find('img,IMG');
foreach ($elems as $index => $elem) {
$attributes = [];
foreach ($elem->getAllAttributes() as $attrName => $attrValue) {
$attributes[strtolower($attrName)] = $attrValue;
}
return $attributes;
}
}
return [];
}
}
/**
* Makes a string with all attributes.
*
* @param array $attribute_array
* @return string
*/
private static function createAttributes($attribute_array)
{
$attributes = '';
foreach ($attribute_array as $attribute => $value) {
$attributes .= $attribute . '="' . $value . '" ';
}
if ($attributes == '') {
return '';
}
// Removes the extra space after the last attribute. Add space before
return ' ' . substr($attributes, 0, -1);
}
/**
* Replace <img> tag with <picture> tag.
*/
private function replaceCallback($match)
{
$imgTag = $match[0];
// Do nothing with images that have the 'webpexpress-processed' class.
if (strpos($imgTag, 'webpexpress-processed')) {
return $imgTag;
}
$imgAttributes = self::getAttributes($imgTag);
$srcInfo = self::lazyGet($imgAttributes, 'src');
$srcsetInfo = self::lazyGet($imgAttributes, 'srcset');
$sizesInfo = self::lazyGet($imgAttributes, 'sizes');
$srcSetAttributes = self::findAttributesWithNameOrPrefixed($imgAttributes, 'srcset');
$srcAttributes = self::findAttributesWithNameOrPrefixed($imgAttributes, 'src');
if ((!isset($srcSetAttributes['srcset'])) && (!isset($srcAttributes['src']))) {
// better not mess with this html...
return $imgTag;
}
// add the exclude class so if this content is processed again in other filter,
// the img is not converted again in picture
$imgAttributes['class'] = (isset($imgAttributes['class']) ? $imgAttributes['class'] . " " : "") .
"webpexpress-processed";
// Process srcset (also data-srcset etc)
$atLeastOneWebp = false;
$sourceTagAttributes = [];
foreach ($srcSetAttributes as $attrName => $attrValue) {
$srcsetArr = explode(', ', $attrValue);
$srcsetArrWebP = [];
foreach ($srcsetArr as $i => $srcSetEntry) {
// $srcSetEntry is ie "http://example.com/image.jpg 520w"
$result = preg_split('/\s+/', trim($srcSetEntry));
$src = trim($srcSetEntry);
$width = null;
if ($result && count($result) >= 2) {
list($src, $width) = $result;
}
$webpUrl = $this->replaceUrlOr($src, false);
if ($webpUrl == false) {
// We want ALL of the sizes as webp.
// If we cannot have that, it is better to abort! - See #42
return $imgTag;
} else {
if (substr($src, 0, 5) != 'data:') {
$atLeastOneWebp = true;
$srcsetArrWebP[] = $webpUrl . (isset($width) ? ' ' . $width : '');
}
}
}
$sourceTagAttributes[$attrName] = implode(', ', $srcsetArrWebP);
}
foreach ($srcAttributes as $attrName => $attrValue) {
if (substr($attrValue, 0, 5) == 'data:') {
// ignore tags with data urls, such as <img src="data:...
return $imgTag;
}
// Make sure not to override existing srcset with src
if (!isset($sourceTagAttributes[$attrName . 'set'])) {
$srcWebP = $this->replaceUrlOr($attrValue, false);
if ($srcWebP !== false) {
$atLeastOneWebp = true;
}
$sourceTagAttributes[$attrName . 'set'] = $srcWebP;
}
}
if ($sizesInfo['value']) {
$sourceTagAttributes[$sizesInfo['attrName']] = $sizesInfo['value'];
}
if (!$atLeastOneWebp) {
// We have no webps for you, so no reason to create <picture> tag
return $imgTag;
}
return '<picture>'
. '<source' . self::createAttributes($sourceTagAttributes) . ' type="image/webp">'
. '<img' . self::createAttributes($imgAttributes) . '>'
. '</picture>';
}
/*
*
*/
public function removePictureTagsTemporarily($content)
{
//print_r($content);
$this->existingPictureTags[] = $content[0];
return 'PICTURE_TAG_' . (count($this->existingPictureTags) - 1) . '_';
}
/*
*
*/
public function insertPictureTagsBack($content)
{
$numberString = $content[1];
$numberInt = intval($numberString);
return $this->existingPictureTags[$numberInt];
}
/**
*
*/
public function replaceHtml($content)
{
if (!class_exists('\\DOMDocument') && function_exists('mb_detect_encoding')) {
// PS: Correctly identifying Windows-1251 encoding only works on some systems
// But at least I'm not aware of any false positives
if (mb_detect_encoding($content, ["ASCII", "UTF8", "Windows-1251"]) == 'Windows-1251') {
$content = mb_convert_encoding($content, 'UTF-8', 'Windows-1251');
}
}
$this->existingPictureTags = [];
// Tempororily remove existing <picture> tags
$content = preg_replace_callback(
'/<picture[^>]*>.*?<\/picture>/is',
array($this, 'removePictureTagsTemporarily'),
$content
);
// Replace "<img>" tags
$content = preg_replace_callback('/<img[^>]*>/i', array($this, 'replaceCallback'), $content);
// Re-insert <picture> tags that was removed
$content = preg_replace_callback('/PICTURE_TAG_(\d+)_/', array($this, 'insertPictureTagsBack'), $content);
return $content;
}
/* Main replacer function */
public static function replace($html)
{
$pt = new static();
return $pt->replaceHtml($html);
}
}