[php] pobieranie rekordów wyszukiwania

[php] pobieranie rekordów wyszukiwania, w

yta Zobacz profil	25.03.2011, 18:06:04 Post #1
Grupa: Zarejestrowani Postów: 201 Pomógł: 3 Dołączył: 10.04.2010 Ostrzeżenie: (0%)	Witam mam problem ze skryptem PHP który fizycznie miałby pobierać adresy URL wyszukiwania chodzi mi o pobranie do bazy danych wszystkich rekordów z wyszukiwania site:domena czyli tego : http://www.google.pl/#sclient=psy&hl=p...d516b91cedd4b71 . Znalazłem na pewnej stronie taki skrypt : plik config.php [PHP] pobierz, plaintext <?php $use_proxy='no'; // use either yes or no if ($use_proxy=="yes"){ // Please test the proxy and make sure it works before using it in the config fields below $proxy_ip = '124.153.75.31:80'; // use format ip:port ex. 202.106.121.134:80 - get more from <a href="http://www.samair.ru/proxy/time-01.htm" target="_blank">http://www.samair.ru/proxy/time-01.htm</a> $proxy_user = 'user:pass'; // use format user:pass - some proxies don't need user/pass so in such case make it $proxy_user = ''; } ?> [PHP] pobierz, plaintext url_harvester.php [PHP] pobierz, plaintext <?php include("config.php"); // Check if form has been submitted if($_POST['submit']){ ini_set("max_execution_time", 0); set_time_limit(0); // no time-outs! // This will allow you to view errors in the browser // Note: set display_errors to 0 in production // ini_set('display_errors',1); // Report all PHP errors (notices, errors, warnings, etc.) // error_reporting(E_ALL); $engine = $_POST["engine"]; if($engine=="yahoo") { $query = urlencode($_REQUEST['query']); // // Substitute this application ID with your own application ID provided by Yahoo!.` $appID = "7mOnTDvV34GdHdNm9XPcb6Ms_lbhz8hKyylyUJVY8pva..UnfTCTaw31kRoAQ1vi"; $start = 1; while($start<902) { // URI used for making REST call, Each Web Service uses a unique URL. $request = "http://search.yahooapis.com/WebSearchService/V1/webSearch?query=$query&appid=$appID&output=xml&start=$start&results=100"; // echo "$request<br><br>"; //$response = file_get_contents($request); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $request); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3"); curl_setopt($ch, CURLOPT_HEADER, 1); if($use_proxy=="yes") { curl_setopt($ch, CURLOPT_PROXY, $proxy_ip); curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxy_user); } curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10); curl_setopt($ch, CURLOPT_TIMEOUT, 10); $response = curl_exec($ch); curl_close($ch); // Confirm that the request was transmitted to the Yahoo! Image Search Service if(!$response) { die('Request to Yahoo! Search Service failed and no response was returned.'); } // echo $response; // Get the XML from the response, bypassing the header if (!($xml = strstr($response, '<?xml'))) { $xml = null; } // Create a SimpleXML object with XML response $simple_xml = simplexml_load_string($xml); // Traverse XML tree and save desired values from child nodes foreach($simple_xml->Result as $result) { $output= "{$result->Url}<br>"; echo $output; } $start=($start+100); } // loop end echo "<br><br>Results finished or limit of 1000 results reached...<br>"; } // if yahoo if($engine=="google_blog") { $query = $_REQUEST['query']; //echo "Query 1: $query<br>"; $query = str_replace(" ", "+", $query); //echo "Query 2: $query<br>"; $query = stripslashes($query); echo "Query 3: $query<br>"; $num = 0; $start = 0; do { $request = 'http://blogsearch.google.com/blogsearch_feeds?hl=en&q=' .$query. '&ie=utf-8&num=100&start=' .$start. '&output=rss'; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $request); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3"); curl_setopt($ch, CURLOPT_HEADER, 1); if($use_proxy=="yes") { curl_setopt($ch, CURLOPT_PROXY, $proxy_ip); curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxy_user); } curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10); curl_setopt($ch, CURLOPT_TIMEOUT, 10); $response = curl_exec($ch); curl_close($ch); // Confirm that the request was transmitted to the Yahoo! Image Search Service if(!$response) { die('Request to Yahoo! Search Service failed and no response was returned.'); } //Loop through the feed, and suck out the URL's $xml = new SimpleXMLElement($response); foreach ($xml->channel->item as $item) { //Add 1 to our counter, so our list has numbers next to the URL's $num = $num + 1; $link = $item->link; echo "$link <br>"; } sleep(rand(10,20)); $start = $start + 100; } while ($start < 25270000000); echo "<br><br>Results number: $num<br>"; } // if google blog if($engine=="google_site") { $query = urlencode($_REQUEST['query']); $num = 0; $start = 0; do { $request = "http://www.google.com/search?hl=en&start=$start&num=25270000000&q=$query"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $request); curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3"); curl_setopt($ch, CURLOPT_HEADER, 1); if($use_proxy=="yes") { curl_setopt($ch, CURLOPT_PROXY, $proxy_ip); curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxy_user); } curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10); curl_setopt($ch, CURLOPT_TIMEOUT, 10); $response = curl_exec($ch); curl_close($ch); if(!$response) { die('Request to Yahoo! Search Service failed and no response was returned.'); } preg_match_all("(<h3 class=r><a href=\"(.)\".>(.*)</a></h3>)siU", $response, $matches); for ($i = 0; $i < count($matches[1]); $i++) { $matches[1][$i] = strip_tags($matches[1][$i]); $resultlink=$matches[1][$i]; echo "$resultlink<br>"; $num = $num + 1; } sleep(rand(10,20)); $start = $start + 100; } while ($start < 1000); echo "<br><br>Results number: $num<br>"; } // if google site } // if submit else { ?> <br /> <br /> <fieldset> <legend> <label for="query">Write your search query, Results are powered by Yahoo/Google with maximum number of results is 1000 per Yahoo/Google terms.</label> </legend> <form method=POST> <label for="query">Query: </label><br /> <input type="text" size="150" id="query" name="query"/><br /><br /> Search Engine: <select name="engine"> <option value="yahoo">Yahoo Site Search</option> <option value="google_site">Google Site Search</option> <option value="google_blog">Google Blog Search</option> </select>     <input type=submit name="submit" value="Submit Query" /> </form> </fieldset> <br /><br /> <center>Powered by <a title="Scripteen Free URL Harvester" href="http://www.scripteen.com">Scripteen Free URL Harvester</a></center> <?}?> [PHP] pobierz, plaintext Problem jest taki : A) jak dodać jemu zapis linkow / rekordow pod wskazaną bazę SQL ? jak dodać jemu sprawdzanie czy adres IP jest zbanowany (jeżeli tak pobiera nowy z pliku txt jak nowy zbanują to kolejny itd) C) czy ten skrypt poradzi sobie ze zapisem wszystkich odnośników ? Podbijam zależy mi na czasie.. Pomoże ktoś ? bd wdzięczny REF pomoże ktoś ?