I wrote an AJAX thing to load the HTML of some website into a textarea.
The response returned from the PHP script is strange. The whole PHP file gets sent! I alerted it to see what it was:
Then when I actually inserted the response into the DIV, it was all weird. It was missing the textarea tag, the PHP comments, etc…
JS:
// ============================================================================
// ajax
// args: string - server script file with or without parameters appended to
// to the end
// string - id name of the element you want to insert the server
// response
// ret: void
// about: standard AJAX GET roundtrip
// ----------------------------------------------------------------------------
function ajax(serverScript, insert)
{
var xmlHttp;
try
{
// Firefox, Opera 8.0+, Safari
xmlHttp = new XMLHttpRequest();
}
catch (e)
{
// Internet Explorer 6+
try
{
xmlHttp = new ActiveXObject("Msxml2.XMLHTTP");
}
catch (e)
{
// Internet Eplorer 5
try
{
xmlHttp = new ActiveXObject("Microsoft.XMLHTTP");
}
catch (e)
{
alert("Your browser does not support AJAX. Download a newer browser to view this page.");
return false;
}
}
}
// insert server response into HTML element
xmlHttp.onreadystatechange = function()
{
if(xmlHttp.readyState == 4)
{
alert(xmlHttp.responseText);
document.getElementById(insert).innerHTML = xmlHttp.responseText;
}
}
xmlHttp.open("GET", serverScript, true);
xmlHttp.send(null);
}
// ============================================================================
// loadHtml
// args: none
// ret: void
// about: Loads the start page's HTML into a textarea.
// ----------------------------------------------------------------------------
function loadHtml()
{
var url = document.getElementById('startPage').value;
ajax("loadHtml.php?url=" + url, "html");
}
PHP:
<?php
// title: loadHtml.php
// author: ---
// date: 10/17/08
// about: Given a URL, fetches its HTML.
// FIX: check for loading of malicious JS scripts (it doesn't mean if a file
// doesn't have a JS extension, then it isn't JS text inside that file).
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
echo '<textarea>' . file_get_contents($_GET['url']) . '</textarea>';
?>
HTML:
<h2>start page</h2>
<i>URL of the page to start scraping</i>
<b>URL:</b>
<input type="text" name="startPage" id="startPage" />
<input type="button" value="load" onclick="loadHtml()" />
<b>HTML:</b>
<div id="html">
<textarea>N/A</textarea>
</div>