4 Commits

Author SHA1 Message Date
c392af46e7 Merge pull request 'Update for FW switching to sending Binary instead of raw html' (#3) from Fix-fw-binary into main
All checks were successful
Create Release / build (push) Successful in 3s
Reviewed-on: #3
2025-04-22 17:50:27 +00:00
1bdae9a815 Update for FW switching to sending Binary instead of raw html 2025-04-22 10:48:19 -07:00
be7db52eec Update DOM crawler to resolver PHP warning
All checks were successful
Create Release / build (push) Successful in 3s
2025-01-26 12:30:03 -08:00
jknapp
059cc94063 Merge pull request 'updating docs and settings for release' (#1) from update-documentation into main
All checks were successful
Create Release / build (push) Successful in 2s
Reviewed-on: CyberCoveLLC/fourth-wall-embed-wp#1
2025-01-11 03:58:05 +00:00

View File

@@ -4,9 +4,50 @@ function fwembed_parse_html($url = null) {
if ($url === null) { if ($url === null) {
throw new ValueError("Missing URL"); throw new ValueError("Missing URL");
} }
$ch = curl_init();
// More complete browser-like headers
$headers = [
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language: en-US,en;q=0.5',
'Connection: keep-alive',
'Upgrade-Insecure-Requests: 1',
'Cache-Control: max-age=0'
];
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_ENCODING, "");
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0');
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Only if necessary for testing
curl_setopt($ch, CURLOPT_COOKIEJAR, '/tmp/cookies.txt'); // Store cookies
curl_setopt($ch, CURLOPT_COOKIEFILE, '/tmp/cookies.txt'); // Use cookies
$html_content = curl_exec($ch);
if (curl_errno($ch)) {
$error = curl_error($ch);
curl_close($ch);
return "Error fetching URL: " . $error;
}
$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if ($status_code == 403) {
curl_close($ch);
return "Access forbidden (403). The website may be blocking automated requests.";
}
curl_close($ch);
$html = null; $html = null;
libxml_use_internal_errors(true);
$dom = new DOMDocument(); $dom = new DOMDocument();
@$dom->loadHTML(file_get_contents($url)); @$dom->loadHTML(loadHTML5($html_content), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$dom->documentURI = $url; $dom->documentURI = $url;
$divs = $dom->getElementsByTagName('div'); $divs = $dom->getElementsByTagName('div');
foreach ($divs as $div) { foreach ($divs as $div) {
@@ -43,9 +84,14 @@ function fwembed_parse_html($url = null) {
$html = $html . '<div class="product-tile"><a class="product-link" target="_blank" href="' . $url . $linkHref . '">' . $productHTML . '</a></div>'; $html = $html . '<div class="product-tile"><a class="product-link" target="_blank" href="' . $url . $linkHref . '">' . $productHTML . '</a></div>';
} }
} }
libxml_clear_errors();
return $html; return $html;
} }
function loadHTML5($html) {
return '<!DOCTYPE html><html><body>' . $html . '</body></html>';
}
function fwembed_shortcode( $atts ) { function fwembed_shortcode( $atts ) {
$options = get_option( 'fourthwall_settings_name' ); $options = get_option( 'fourthwall_settings_name' );
$value = isset( $options['fourth_url'] ) ? $options['fourth_url'] : 'https://fourthwall.com'; $value = isset( $options['fourth_url'] ) ? $options['fourth_url'] : 'https://fourthwall.com';