Merge pull request 'Update for FW switching to sending Binary instead of raw html' (#3) from Fix-fw-binary into main
All checks were successful
Create Release / build (push) Successful in 3s
All checks were successful
Create Release / build (push) Successful in 3s
Reviewed-on: #3
This commit is contained in:
commit
c392af46e7
@ -4,10 +4,50 @@ function fwembed_parse_html($url = null) {
|
|||||||
if ($url === null) {
|
if ($url === null) {
|
||||||
throw new ValueError("Missing URL");
|
throw new ValueError("Missing URL");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
$ch = curl_init();
|
||||||
|
|
||||||
|
// More complete browser-like headers
|
||||||
|
$headers = [
|
||||||
|
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||||
|
'Accept-Language: en-US,en;q=0.5',
|
||||||
|
'Connection: keep-alive',
|
||||||
|
'Upgrade-Insecure-Requests: 1',
|
||||||
|
'Cache-Control: max-age=0'
|
||||||
|
];
|
||||||
|
|
||||||
|
curl_setopt($ch, CURLOPT_URL, $url);
|
||||||
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||||
|
curl_setopt($ch, CURLOPT_ENCODING, "");
|
||||||
|
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
|
||||||
|
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0');
|
||||||
|
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
|
||||||
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // Only if necessary for testing
|
||||||
|
curl_setopt($ch, CURLOPT_COOKIEJAR, '/tmp/cookies.txt'); // Store cookies
|
||||||
|
curl_setopt($ch, CURLOPT_COOKIEFILE, '/tmp/cookies.txt'); // Use cookies
|
||||||
|
|
||||||
|
$html_content = curl_exec($ch);
|
||||||
|
|
||||||
|
if (curl_errno($ch)) {
|
||||||
|
$error = curl_error($ch);
|
||||||
|
curl_close($ch);
|
||||||
|
return "Error fetching URL: " . $error;
|
||||||
|
}
|
||||||
|
|
||||||
|
$status_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||||
|
if ($status_code == 403) {
|
||||||
|
curl_close($ch);
|
||||||
|
return "Access forbidden (403). The website may be blocking automated requests.";
|
||||||
|
}
|
||||||
|
|
||||||
|
curl_close($ch);
|
||||||
|
|
||||||
$html = null;
|
$html = null;
|
||||||
libxml_use_internal_errors(true);
|
libxml_use_internal_errors(true);
|
||||||
$dom = new DOMDocument();
|
$dom = new DOMDocument();
|
||||||
@$dom->loadHTML(loadHTML5(file_get_contents($url)), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
@$dom->loadHTML(loadHTML5($html_content), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
||||||
$dom->documentURI = $url;
|
$dom->documentURI = $url;
|
||||||
$divs = $dom->getElementsByTagName('div');
|
$divs = $dom->getElementsByTagName('div');
|
||||||
foreach ($divs as $div) {
|
foreach ($divs as $div) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user