1 | <?php
|
---|
2 |
|
---|
3 | /*************************************************
|
---|
4 |
|
---|
5 | Snoopy - the PHP net client
|
---|
6 | Author: Monte Ohrt <monte@ispi.net>
|
---|
7 | Copyright (c): 1999-2000 ispi, all rights reserved
|
---|
8 | Version: 1.0
|
---|
9 |
|
---|
10 | * This library is free software; you can redistribute it and/or
|
---|
11 | * modify it under the terms of the GNU Lesser General Public
|
---|
12 | * License as published by the Free Software Foundation; either
|
---|
13 | * version 2.1 of the License, or (at your option) any later version.
|
---|
14 | *
|
---|
15 | * This library is distributed in the hope that it will be useful,
|
---|
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
18 | * Lesser General Public License for more details.
|
---|
19 | *
|
---|
20 | * You should have received a copy of the GNU Lesser General Public
|
---|
21 | * License along with this library; if not, write to the Free Software
|
---|
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
---|
23 |
|
---|
24 | You may contact the author of Snoopy by e-mail at:
|
---|
25 | monte@ispi.net
|
---|
26 |
|
---|
27 | Or, write to:
|
---|
28 | Monte Ohrt
|
---|
29 | CTO, ispi
|
---|
30 | 237 S. 70th suite 220
|
---|
31 | Lincoln, NE 68510
|
---|
32 |
|
---|
33 | The latest version of Snoopy can be obtained from:
|
---|
34 | http://snoopy.sourceforge.com
|
---|
35 |
|
---|
36 | *************************************************/
|
---|
37 |
|
---|
38 | class Snoopy
|
---|
39 | {
|
---|
40 | /**** Public variables ****/
|
---|
41 |
|
---|
42 | /* user definable vars */
|
---|
43 |
|
---|
44 | var $host = "www.php.net"; // host name we are connecting to
|
---|
45 | var $port = 80; // port we are connecting to
|
---|
46 | var $proxy_host = ""; // proxy host to use
|
---|
47 | var $proxy_port = ""; // proxy port to use
|
---|
48 | var $agent = "Snoopy v1.0"; // agent we masquerade as
|
---|
49 | var $referer = ""; // referer info to pass
|
---|
50 | var $cookies = array(); // array of cookies to pass
|
---|
51 | // $cookies["username"]="joe";
|
---|
52 | var $rawheaders = array(); // array of raw headers to send
|
---|
53 | // $rawheaders["Content-type"]="text/html";
|
---|
54 |
|
---|
55 | var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
|
---|
56 | var $lastredirectaddr = ""; // contains address of last redirected address
|
---|
57 | var $offsiteok = true; // allows redirection off-site
|
---|
58 | var $maxframes = 0; // frame content depth maximum. 0 = disallow
|
---|
59 | var $expandlinks = true; // expand links to fully qualified URLs.
|
---|
60 | // this only applies to fetchlinks()
|
---|
61 | // or submitlinks()
|
---|
62 | var $passcookies = true; // pass set cookies back through redirects
|
---|
63 | // NOTE: this currently does not respect
|
---|
64 | // dates, domains or paths.
|
---|
65 |
|
---|
66 | var $user = ""; // user for http authentication
|
---|
67 | var $pass = ""; // password for http authentication
|
---|
68 |
|
---|
69 | // http accept types
|
---|
70 | var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
|
---|
71 |
|
---|
72 | var $results = ""; // where the content is put
|
---|
73 |
|
---|
74 | var $error = ""; // error messages sent here
|
---|
75 | var $response_code = ""; // response code returned from server
|
---|
76 | var $headers = array(); // headers returned from server sent here
|
---|
77 | var $maxlength = 500000; // max return data length (body)
|
---|
78 | var $read_timeout = 0; // timeout on read operations, in seconds
|
---|
79 | // supported only since PHP 4 Beta 4
|
---|
80 | // set to 0 to disallow timeouts
|
---|
81 | var $timed_out = false; // if a read operation timed out
|
---|
82 | var $status = 0; // http request status
|
---|
83 |
|
---|
84 | var $curl_path = "/usr/bin/curl";
|
---|
85 | // Snoopy will use cURL for fetching
|
---|
86 | // SSL content if a full system path to
|
---|
87 | // the cURL binary is supplied here.
|
---|
88 | // set to false if you do not have
|
---|
89 | // cURL installed. See http://curl.haxx.se
|
---|
90 | // for details on installing cURL.
|
---|
91 | // Snoopy does *not* use the cURL
|
---|
92 | // library functions built into php,
|
---|
93 | // as these functions are not stable
|
---|
94 | // as of this Snoopy release.
|
---|
95 |
|
---|
96 | // send Accept-encoding: gzip?
|
---|
97 | var $use_gzip = true;
|
---|
98 |
|
---|
99 | /**** Private variables ****/
|
---|
100 |
|
---|
101 | var $_maxlinelen = 4096; // max line length (headers)
|
---|
102 |
|
---|
103 | var $_httpmethod = "GET"; // default http request method
|
---|
104 | var $_httpversion = "HTTP/1.0"; // default http request version
|
---|
105 | var $_submit_method = "POST"; // default submit method
|
---|
106 | var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
|
---|
107 | var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
|
---|
108 | var $_redirectaddr = false; // will be set if page fetched is a redirect
|
---|
109 | var $_redirectdepth = 0; // increments on an http redirect
|
---|
110 | var $_frameurls = array(); // frame src urls
|
---|
111 | var $_framedepth = 0; // increments on frame depth
|
---|
112 |
|
---|
113 | var $_isproxy = false; // set if using a proxy server
|
---|
114 | var $_fp_timeout = 30; // timeout for socket connection
|
---|
115 |
|
---|
116 | /*======================================================================*\
|
---|
117 | Function: fetch
|
---|
118 | Purpose: fetch the contents of a web page
|
---|
119 | (and possibly other protocols in the
|
---|
120 | future like ftp, nntp, gopher, etc.)
|
---|
121 | Input: $URI the location of the page to fetch
|
---|
122 | Output: $this->results the output text from the fetch
|
---|
123 | \*======================================================================*/
|
---|
124 |
|
---|
125 | function fetch($URI)
|
---|
126 | {
|
---|
127 |
|
---|
128 | //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
|
---|
129 | $URI_PARTS = parse_url($URI);
|
---|
130 | if (!empty($URI_PARTS["user"]))
|
---|
131 | $this->user = $URI_PARTS["user"];
|
---|
132 | if (!empty($URI_PARTS["pass"]))
|
---|
133 | $this->pass = $URI_PARTS["pass"];
|
---|
134 |
|
---|
135 | switch($URI_PARTS["scheme"])
|
---|
136 | {
|
---|
137 | case "http":
|
---|
138 | $this->host = $URI_PARTS["host"];
|
---|
139 | if(!empty($URI_PARTS["port"]))
|
---|
140 | $this->port = $URI_PARTS["port"];
|
---|
141 | if($this->_connect($fp))
|
---|
142 | {
|
---|
143 | if($this->_isproxy)
|
---|
144 | {
|
---|
145 | // using proxy, send entire URI
|
---|
146 | $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
|
---|
147 | }
|
---|
148 | else
|
---|
149 | {
|
---|
150 | $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : "");
|
---|
151 | // no proxy, send only the path
|
---|
152 | $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
|
---|
153 | }
|
---|
154 |
|
---|
155 | $this->_disconnect($fp);
|
---|
156 |
|
---|
157 | if($this->_redirectaddr)
|
---|
158 | {
|
---|
159 | /* url was redirected, check if we've hit the max depth */
|
---|
160 | if($this->maxredirs > $this->_redirectdepth)
|
---|
161 | {
|
---|
162 | // only follow redirect if it's on this site, or offsiteok is true
|
---|
163 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
---|
164 | {
|
---|
165 | /* follow the redirect */
|
---|
166 | $this->_redirectdepth++;
|
---|
167 | $this->lastredirectaddr=$this->_redirectaddr;
|
---|
168 | $this->fetch($this->_redirectaddr);
|
---|
169 | }
|
---|
170 | }
|
---|
171 | }
|
---|
172 |
|
---|
173 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
---|
174 | {
|
---|
175 | $frameurls = $this->_frameurls;
|
---|
176 | $this->_frameurls = array();
|
---|
177 |
|
---|
178 | while(list(,$frameurl) = each($frameurls))
|
---|
179 | {
|
---|
180 | if($this->_framedepth < $this->maxframes)
|
---|
181 | {
|
---|
182 | $this->fetch($frameurl);
|
---|
183 | $this->_framedepth++;
|
---|
184 | }
|
---|
185 | else
|
---|
186 | break;
|
---|
187 | }
|
---|
188 | }
|
---|
189 | }
|
---|
190 | else
|
---|
191 | {
|
---|
192 | return false;
|
---|
193 | }
|
---|
194 | return true;
|
---|
195 | break;
|
---|
196 | case "https":
|
---|
197 | if(!$this->curl_path || (!is_executable($this->curl_path))) {
|
---|
198 | $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";
|
---|
199 | return false;
|
---|
200 | }
|
---|
201 | $this->host = $URI_PARTS["host"];
|
---|
202 | if(!empty($URI_PARTS["port"]))
|
---|
203 | $this->port = $URI_PARTS["port"];
|
---|
204 | if($this->_isproxy)
|
---|
205 | {
|
---|
206 | // using proxy, send entire URI
|
---|
207 | $this->_httpsrequest($URI,$URI,$this->_httpmethod);
|
---|
208 | }
|
---|
209 | else
|
---|
210 | {
|
---|
211 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
|
---|
212 | // no proxy, send only the path
|
---|
213 | $this->_httpsrequest($path, $URI, $this->_httpmethod);
|
---|
214 | }
|
---|
215 |
|
---|
216 | if($this->_redirectaddr)
|
---|
217 | {
|
---|
218 | /* url was redirected, check if we've hit the max depth */
|
---|
219 | if($this->maxredirs > $this->_redirectdepth)
|
---|
220 | {
|
---|
221 | // only follow redirect if it's on this site, or offsiteok is true
|
---|
222 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
---|
223 | {
|
---|
224 | /* follow the redirect */
|
---|
225 | $this->_redirectdepth++;
|
---|
226 | $this->lastredirectaddr=$this->_redirectaddr;
|
---|
227 | $this->fetch($this->_redirectaddr);
|
---|
228 | }
|
---|
229 | }
|
---|
230 | }
|
---|
231 |
|
---|
232 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
---|
233 | {
|
---|
234 | $frameurls = $this->_frameurls;
|
---|
235 | $this->_frameurls = array();
|
---|
236 |
|
---|
237 | while(list(,$frameurl) = each($frameurls))
|
---|
238 | {
|
---|
239 | if($this->_framedepth < $this->maxframes)
|
---|
240 | {
|
---|
241 | $this->fetch($frameurl);
|
---|
242 | $this->_framedepth++;
|
---|
243 | }
|
---|
244 | else
|
---|
245 | break;
|
---|
246 | }
|
---|
247 | }
|
---|
248 | return true;
|
---|
249 | break;
|
---|
250 | default:
|
---|
251 | // not a valid protocol
|
---|
252 | $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
|
---|
253 | return false;
|
---|
254 | break;
|
---|
255 | }
|
---|
256 | return true;
|
---|
257 | }
|
---|
258 |
|
---|
259 |
|
---|
260 |
|
---|
261 | /*======================================================================*\
|
---|
262 | Private functions
|
---|
263 | \*======================================================================*/
|
---|
264 |
|
---|
265 |
|
---|
266 | /*======================================================================*\
|
---|
267 | Function: _striplinks
|
---|
268 | Purpose: strip the hyperlinks from an html document
|
---|
269 | Input: $document document to strip.
|
---|
270 | Output: $match an array of the links
|
---|
271 | \*======================================================================*/
|
---|
272 |
|
---|
273 | function _striplinks($document)
|
---|
274 | {
|
---|
275 | preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href=
|
---|
276 | ([\"\'])? # find single or double quote
|
---|
277 | (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
|
---|
278 | # quote, otherwise match up to next space
|
---|
279 | 'isx",$document,$links);
|
---|
280 |
|
---|
281 |
|
---|
282 | // catenate the non-empty matches from the conditional subpattern
|
---|
283 |
|
---|
284 | while(list($key,$val) = each($links[2]))
|
---|
285 | {
|
---|
286 | if(!empty($val))
|
---|
287 | $match[] = $val;
|
---|
288 | }
|
---|
289 |
|
---|
290 | while(list($key,$val) = each($links[3]))
|
---|
291 | {
|
---|
292 | if(!empty($val))
|
---|
293 | $match[] = $val;
|
---|
294 | }
|
---|
295 |
|
---|
296 | // return the links
|
---|
297 | return $match;
|
---|
298 | }
|
---|
299 |
|
---|
300 | /*======================================================================*\
|
---|
301 | Function: _stripform
|
---|
302 | Purpose: strip the form elements from an html document
|
---|
303 | Input: $document document to strip.
|
---|
304 | Output: $match an array of the links
|
---|
305 | \*======================================================================*/
|
---|
306 |
|
---|
307 | function _stripform($document)
|
---|
308 | {
|
---|
309 | preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
|
---|
310 |
|
---|
311 | // catenate the matches
|
---|
312 | $match = implode("\r\n",$elements[0]);
|
---|
313 |
|
---|
314 | // return the links
|
---|
315 | return $match;
|
---|
316 | }
|
---|
317 |
|
---|
318 |
|
---|
319 |
|
---|
320 | /*======================================================================*\
|
---|
321 | Function: _striptext
|
---|
322 | Purpose: strip the text from an html document
|
---|
323 | Input: $document document to strip.
|
---|
324 | Output: $text the resulting text
|
---|
325 | \*======================================================================*/
|
---|
326 |
|
---|
327 | function _striptext($document)
|
---|
328 | {
|
---|
329 |
|
---|
330 | // I didn't use preg eval (//e) since that is only available in PHP 4.0.
|
---|
331 | // so, list your entities one by one here. I included some of the
|
---|
332 | // more common ones.
|
---|
333 |
|
---|
334 | $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
|
---|
335 | "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
|
---|
336 | "'([\r\n])[\s]+'", // strip out white space
|
---|
337 | "'&(quote|#34);'i", // replace html entities
|
---|
338 | "'&(amp|#38);'i",
|
---|
339 | "'&(lt|#60);'i",
|
---|
340 | "'&(gt|#62);'i",
|
---|
341 | "'&(nbsp|#160);'i",
|
---|
342 | "'&(iexcl|#161);'i",
|
---|
343 | "'&(cent|#162);'i",
|
---|
344 | "'&(pound|#163);'i",
|
---|
345 | "'&(copy|#169);'i"
|
---|
346 | );
|
---|
347 | $replace = array( "",
|
---|
348 | "",
|
---|
349 | "\\1",
|
---|
350 | "\"",
|
---|
351 | "&",
|
---|
352 | "<",
|
---|
353 | ">",
|
---|
354 | " ",
|
---|
355 | chr(161),
|
---|
356 | chr(162),
|
---|
357 | chr(163),
|
---|
358 | chr(169));
|
---|
359 |
|
---|
360 | $text = preg_replace($search,$replace,$document);
|
---|
361 |
|
---|
362 | return $text;
|
---|
363 | }
|
---|
364 |
|
---|
365 | /*======================================================================*\
|
---|
366 | Function: _expandlinks
|
---|
367 | Purpose: expand each link into a fully qualified URL
|
---|
368 | Input: $links the links to qualify
|
---|
369 | $URI the full URI to get the base from
|
---|
370 | Output: $expandedLinks the expanded links
|
---|
371 | \*======================================================================*/
|
---|
372 |
|
---|
373 | function _expandlinks($links,$URI)
|
---|
374 | {
|
---|
375 |
|
---|
376 | preg_match("/^[^\?]+/",$URI,$match);
|
---|
377 |
|
---|
378 | $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
|
---|
379 |
|
---|
380 | $search = array( "|^http://".preg_quote($this->host)."|i",
|
---|
381 | "|^(?!http://)(\/)?(?!mailto:)|i",
|
---|
382 | "|/\./|",
|
---|
383 | "|/[^\/]+/\.\./|"
|
---|
384 | );
|
---|
385 |
|
---|
386 | $replace = array( "",
|
---|
387 | $match."/",
|
---|
388 | "/",
|
---|
389 | "/"
|
---|
390 | );
|
---|
391 |
|
---|
392 | $expandedLinks = preg_replace($search,$replace,$links);
|
---|
393 |
|
---|
394 | return $expandedLinks;
|
---|
395 | }
|
---|
396 |
|
---|
397 | /*======================================================================*\
|
---|
398 | Function: _httprequest
|
---|
399 | Purpose: go get the http data from the server
|
---|
400 | Input: $url the url to fetch
|
---|
401 | $fp the current open file pointer
|
---|
402 | $URI the full URI
|
---|
403 | $body body contents to send if any (POST)
|
---|
404 | Output:
|
---|
405 | \*======================================================================*/
|
---|
406 |
|
---|
407 | function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
|
---|
408 | {
|
---|
409 | if($this->passcookies && $this->_redirectaddr)
|
---|
410 | $this->setcookies();
|
---|
411 |
|
---|
412 | $URI_PARTS = parse_url($URI);
|
---|
413 | if(empty($url))
|
---|
414 | $url = "/";
|
---|
415 | $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
|
---|
416 | if(!empty($this->agent))
|
---|
417 | $headers .= "User-Agent: ".$this->agent."\r\n";
|
---|
418 | if(!empty($this->host) && !isset($this->rawheaders['Host']))
|
---|
419 | $headers .= "Host: ".$this->host."\r\n";
|
---|
420 | if(!empty($this->accept))
|
---|
421 | $headers .= "Accept: ".$this->accept."\r\n";
|
---|
422 |
|
---|
423 | if($this->use_gzip) {
|
---|
424 | // make sure PHP was built with --with-zlib
|
---|
425 | // and we can handle gzipp'ed data
|
---|
426 | if ( function_exists(gzinflate) ) {
|
---|
427 | $headers .= "Accept-encoding: gzip\r\n";
|
---|
428 | }
|
---|
429 | else {
|
---|
430 | trigger_error(
|
---|
431 | "use_gzip is on, but PHP was built without zlib support.".
|
---|
432 | " Requesting file(s) without gzip encoding.",
|
---|
433 | E_USER_NOTICE);
|
---|
434 | }
|
---|
435 | }
|
---|
436 |
|
---|
437 | if(!empty($this->referer))
|
---|
438 | $headers .= "Referer: ".$this->referer."\r\n";
|
---|
439 | if(!empty($this->cookies))
|
---|
440 | {
|
---|
441 | if(!is_array($this->cookies))
|
---|
442 | $this->cookies = (array)$this->cookies;
|
---|
443 |
|
---|
444 | reset($this->cookies);
|
---|
445 | if ( count($this->cookies) > 0 ) {
|
---|
446 | $cookie_headers .= 'Cookie: ';
|
---|
447 | foreach ( $this->cookies as $cookieKey => $cookieVal ) {
|
---|
448 | $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
|
---|
449 | }
|
---|
450 | $headers .= substr($cookie_headers,0,-2) . "\r\n";
|
---|
451 | }
|
---|
452 | }
|
---|
453 | if(!empty($this->rawheaders))
|
---|
454 | {
|
---|
455 | if(!is_array($this->rawheaders))
|
---|
456 | $this->rawheaders = (array)$this->rawheaders;
|
---|
457 | while(list($headerKey,$headerVal) = each($this->rawheaders))
|
---|
458 | $headers .= $headerKey.": ".$headerVal."\r\n";
|
---|
459 | }
|
---|
460 | if(!empty($content_type)) {
|
---|
461 | $headers .= "Content-type: $content_type";
|
---|
462 | if ($content_type == "multipart/form-data")
|
---|
463 | $headers .= "; boundary=".$this->_mime_boundary;
|
---|
464 | $headers .= "\r\n";
|
---|
465 | }
|
---|
466 | if(!empty($body))
|
---|
467 | $headers .= "Content-length: ".strlen($body)."\r\n";
|
---|
468 | if(!empty($this->user) || !empty($this->pass))
|
---|
469 | $headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n";
|
---|
470 |
|
---|
471 | $headers .= "\r\n";
|
---|
472 |
|
---|
473 | // set the read timeout if needed
|
---|
474 | if ($this->read_timeout > 0)
|
---|
475 | socket_set_timeout($fp, $this->read_timeout);
|
---|
476 | $this->timed_out = false;
|
---|
477 |
|
---|
478 | fwrite($fp,$headers.$body,strlen($headers.$body));
|
---|
479 |
|
---|
480 | $this->_redirectaddr = false;
|
---|
481 | unset($this->headers);
|
---|
482 |
|
---|
483 | // content was returned gzip encoded?
|
---|
484 | $is_gzipped = false;
|
---|
485 |
|
---|
486 | while($currentHeader = fgets($fp,$this->_maxlinelen))
|
---|
487 | {
|
---|
488 | if ($this->read_timeout > 0 && $this->_check_timeout($fp))
|
---|
489 | {
|
---|
490 | $this->status=-100;
|
---|
491 | return false;
|
---|
492 | }
|
---|
493 |
|
---|
494 | // if($currentHeader == "\r\n")
|
---|
495 | if(preg_match("/^\r?\n$/", $currentHeader) )
|
---|
496 | break;
|
---|
497 |
|
---|
498 | // if a header begins with Location: or URI:, set the redirect
|
---|
499 | if(preg_match("/^(Location:|URI:)/i",$currentHeader))
|
---|
500 | {
|
---|
501 | // get URL portion of the redirect
|
---|
502 | preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches);
|
---|
503 | // look for :// in the Location header to see if hostname is included
|
---|
504 | if(!preg_match("|\:\/\/|",$matches[2]))
|
---|
505 | {
|
---|
506 | // no host in the path, so prepend
|
---|
507 | $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
|
---|
508 | // eliminate double slash
|
---|
509 | if(!preg_match("|^/|",$matches[2]))
|
---|
510 | $this->_redirectaddr .= "/".$matches[2];
|
---|
511 | else
|
---|
512 | $this->_redirectaddr .= $matches[2];
|
---|
513 | }
|
---|
514 | else
|
---|
515 | $this->_redirectaddr = $matches[2];
|
---|
516 | }
|
---|
517 |
|
---|
518 | if(preg_match("|^HTTP/|",$currentHeader))
|
---|
519 | {
|
---|
520 | if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
|
---|
521 | {
|
---|
522 | $this->status= $status[1];
|
---|
523 | }
|
---|
524 | $this->response_code = $currentHeader;
|
---|
525 | }
|
---|
526 |
|
---|
527 | if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) {
|
---|
528 | $is_gzipped = true;
|
---|
529 | }
|
---|
530 |
|
---|
531 | $this->headers[] = $currentHeader;
|
---|
532 | }
|
---|
533 |
|
---|
534 | # $results = fread($fp, $this->maxlength);
|
---|
535 | $results = "";
|
---|
536 | while ( $data = fread($fp, $this->maxlength) ) {
|
---|
537 | $results .= $data;
|
---|
538 | if (
|
---|
539 | strlen($results) > $this->maxlength ) {
|
---|
540 | break;
|
---|
541 | }
|
---|
542 | }
|
---|
543 |
|
---|
544 | // gunzip
|
---|
545 | if ( $is_gzipped ) {
|
---|
546 | // per http://www.php.net/manual/en/function.gzencode.php
|
---|
547 | $results = substr($results, 10);
|
---|
548 | $results = gzinflate($results);
|
---|
549 | }
|
---|
550 |
|
---|
551 | if ($this->read_timeout > 0 && $this->_check_timeout($fp))
|
---|
552 | {
|
---|
553 | $this->status=-100;
|
---|
554 | return false;
|
---|
555 | }
|
---|
556 |
|
---|
557 | // check if there is a a redirect meta tag
|
---|
558 |
|
---|
559 | if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
|
---|
560 | {
|
---|
561 | $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
|
---|
562 | }
|
---|
563 |
|
---|
564 | // have we hit our frame depth and is there frame src to fetch?
|
---|
565 | if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
|
---|
566 | {
|
---|
567 | $this->results[] = $results;
|
---|
568 | for($x=0; $x<count($match[1]); $x++)
|
---|
569 | $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
|
---|
570 | }
|
---|
571 | // have we already fetched framed content?
|
---|
572 | elseif(is_array($this->results))
|
---|
573 | $this->results[] = $results;
|
---|
574 | // no framed content
|
---|
575 | else
|
---|
576 | $this->results = $results;
|
---|
577 |
|
---|
578 | return true;
|
---|
579 | }
|
---|
580 |
|
---|
581 | /*======================================================================*\
|
---|
582 | Function: _httpsrequest
|
---|
583 | Purpose: go get the https data from the server using curl
|
---|
584 | Input: $url the url to fetch
|
---|
585 | $URI the full URI
|
---|
586 | $body body contents to send if any (POST)
|
---|
587 | Output:
|
---|
588 | \*======================================================================*/
|
---|
589 |
|
---|
590 | function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
|
---|
591 | {
|
---|
592 | if($this->passcookies && $this->_redirectaddr)
|
---|
593 | $this->setcookies();
|
---|
594 |
|
---|
595 | $headers = array();
|
---|
596 |
|
---|
597 | $URI_PARTS = parse_url($URI);
|
---|
598 | if(empty($url))
|
---|
599 | $url = "/";
|
---|
600 | // GET ... header not needed for curl
|
---|
601 | //$headers[] = $http_method." ".$url." ".$this->_httpversion;
|
---|
602 | if(!empty($this->agent))
|
---|
603 | $headers[] = "User-Agent: ".$this->agent;
|
---|
604 | if(!empty($this->host))
|
---|
605 | $headers[] = "Host: ".$this->host;
|
---|
606 | if(!empty($this->accept))
|
---|
607 | $headers[] = "Accept: ".$this->accept;
|
---|
608 | if(!empty($this->referer))
|
---|
609 | $headers[] = "Referer: ".$this->referer;
|
---|
610 | if(!empty($this->cookies))
|
---|
611 | {
|
---|
612 | if(!is_array($this->cookies))
|
---|
613 | $this->cookies = (array)$this->cookies;
|
---|
614 |
|
---|
615 | reset($this->cookies);
|
---|
616 | if ( count($this->cookies) > 0 ) {
|
---|
617 | $cookie_str = 'Cookie: ';
|
---|
618 | foreach ( $this->cookies as $cookieKey => $cookieVal ) {
|
---|
619 | $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
|
---|
620 | }
|
---|
621 | $headers[] = substr($cookie_str,0,-2);
|
---|
622 | }
|
---|
623 | }
|
---|
624 | if(!empty($this->rawheaders))
|
---|
625 | {
|
---|
626 | if(!is_array($this->rawheaders))
|
---|
627 | $this->rawheaders = (array)$this->rawheaders;
|
---|
628 | while(list($headerKey,$headerVal) = each($this->rawheaders))
|
---|
629 | $headers[] = $headerKey.": ".$headerVal;
|
---|
630 | }
|
---|
631 | if(!empty($content_type)) {
|
---|
632 | if ($content_type == "multipart/form-data")
|
---|
633 | $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
|
---|
634 | else
|
---|
635 | $headers[] = "Content-type: $content_type";
|
---|
636 | }
|
---|
637 | if(!empty($body))
|
---|
638 | $headers[] = "Content-length: ".strlen($body);
|
---|
639 | if(!empty($this->user) || !empty($this->pass))
|
---|
640 | $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
|
---|
641 |
|
---|
642 | for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
|
---|
643 | $cmdline_params .= " -H \"".$headers[$curr_header]."\"";
|
---|
644 | }
|
---|
645 |
|
---|
646 | if(!empty($body))
|
---|
647 | $cmdline_params .= " -d \"$body\"";
|
---|
648 |
|
---|
649 | if($this->read_timeout > 0)
|
---|
650 | $cmdline_params .= " -m ".$this->read_timeout;
|
---|
651 |
|
---|
652 | $headerfile = uniqid(time());
|
---|
653 |
|
---|
654 | # accept self-signed certs
|
---|
655 | $cmdline_params .= " -k";
|
---|
656 | exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return);
|
---|
657 |
|
---|
658 | if($return)
|
---|
659 | {
|
---|
660 | $this->error = "Error: cURL could not retrieve the document, error $return.";
|
---|
661 | return false;
|
---|
662 | }
|
---|
663 |
|
---|
664 |
|
---|
665 | $results = implode("\r\n",$results);
|
---|
666 |
|
---|
667 | $result_headers = file("/tmp/$headerfile");
|
---|
668 |
|
---|
669 | $this->_redirectaddr = false;
|
---|
670 | unset($this->headers);
|
---|
671 |
|
---|
672 | for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
|
---|
673 | {
|
---|
674 |
|
---|
675 | // if a header begins with Location: or URI:, set the redirect
|
---|
676 | if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
|
---|
677 | {
|
---|
678 | // get URL portion of the redirect
|
---|
679 | preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches);
|
---|
680 | // look for :// in the Location header to see if hostname is included
|
---|
681 | if(!preg_match("|\:\/\/|",$matches[2]))
|
---|
682 | {
|
---|
683 | // no host in the path, so prepend
|
---|
684 | $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
|
---|
685 | // eliminate double slash
|
---|
686 | if(!preg_match("|^/|",$matches[2]))
|
---|
687 | $this->_redirectaddr .= "/".$matches[2];
|
---|
688 | else
|
---|
689 | $this->_redirectaddr .= $matches[2];
|
---|
690 | }
|
---|
691 | else
|
---|
692 | $this->_redirectaddr = $matches[2];
|
---|
693 | }
|
---|
694 |
|
---|
695 | if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
|
---|
696 | {
|
---|
697 | $this->response_code = $result_headers[$currentHeader];
|
---|
698 | if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match))
|
---|
699 | {
|
---|
700 | $this->status= $match[1];
|
---|
701 | }
|
---|
702 | }
|
---|
703 | $this->headers[] = $result_headers[$currentHeader];
|
---|
704 | }
|
---|
705 |
|
---|
706 | // check if there is a a redirect meta tag
|
---|
707 |
|
---|
708 | if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
|
---|
709 | {
|
---|
710 | $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
|
---|
711 | }
|
---|
712 |
|
---|
713 | // have we hit our frame depth and is there frame src to fetch?
|
---|
714 | if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
|
---|
715 | {
|
---|
716 | $this->results[] = $results;
|
---|
717 | for($x=0; $x<count($match[1]); $x++)
|
---|
718 | $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
|
---|
719 | }
|
---|
720 | // have we already fetched framed content?
|
---|
721 | elseif(is_array($this->results))
|
---|
722 | $this->results[] = $results;
|
---|
723 | // no framed content
|
---|
724 | else
|
---|
725 | $this->results = $results;
|
---|
726 |
|
---|
727 | unlink("/tmp/$headerfile");
|
---|
728 |
|
---|
729 | return true;
|
---|
730 | }
|
---|
731 |
|
---|
732 | /*======================================================================*\
|
---|
733 | Function: setcookies()
|
---|
734 | Purpose: set cookies for a redirection
|
---|
735 | \*======================================================================*/
|
---|
736 |
|
---|
737 | function setcookies()
|
---|
738 | {
|
---|
739 | for($x=0; $x<count($this->headers); $x++)
|
---|
740 | {
|
---|
741 | if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match))
|
---|
742 | $this->cookies[$match[1]] = $match[2];
|
---|
743 | }
|
---|
744 | }
|
---|
745 |
|
---|
746 |
|
---|
747 | /*======================================================================*\
|
---|
748 | Function: _check_timeout
|
---|
749 | Purpose: checks whether timeout has occurred
|
---|
750 | Input: $fp file pointer
|
---|
751 | \*======================================================================*/
|
---|
752 |
|
---|
753 | function _check_timeout($fp)
|
---|
754 | {
|
---|
755 | if ($this->read_timeout > 0) {
|
---|
756 | $fp_status = socket_get_status($fp);
|
---|
757 | if ($fp_status["timed_out"]) {
|
---|
758 | $this->timed_out = true;
|
---|
759 | return true;
|
---|
760 | }
|
---|
761 | }
|
---|
762 | return false;
|
---|
763 | }
|
---|
764 |
|
---|
765 | /*======================================================================*\
|
---|
766 | Function: _connect
|
---|
767 | Purpose: make a socket connection
|
---|
768 | Input: $fp file pointer
|
---|
769 | \*======================================================================*/
|
---|
770 |
|
---|
771 | function _connect(&$fp)
|
---|
772 | {
|
---|
773 | if(!empty($this->proxy_host) && !empty($this->proxy_port))
|
---|
774 | {
|
---|
775 | $this->_isproxy = true;
|
---|
776 | $host = $this->proxy_host;
|
---|
777 | $port = $this->proxy_port;
|
---|
778 | }
|
---|
779 | else
|
---|
780 | {
|
---|
781 | $host = $this->host;
|
---|
782 | $port = $this->port;
|
---|
783 | }
|
---|
784 |
|
---|
785 | $this->status = 0;
|
---|
786 |
|
---|
787 | if($fp = fsockopen(
|
---|
788 | $host,
|
---|
789 | $port,
|
---|
790 | $errno,
|
---|
791 | $errstr,
|
---|
792 | $this->_fp_timeout
|
---|
793 | ))
|
---|
794 | {
|
---|
795 | // socket connection succeeded
|
---|
796 |
|
---|
797 | return true;
|
---|
798 | }
|
---|
799 | else
|
---|
800 | {
|
---|
801 | // socket connection failed
|
---|
802 | $this->status = $errno;
|
---|
803 | switch($errno)
|
---|
804 | {
|
---|
805 | case -3:
|
---|
806 | $this->error="socket creation failed (-3)";
|
---|
807 | case -4:
|
---|
808 | $this->error="dns lookup failure (-4)";
|
---|
809 | case -5:
|
---|
810 | $this->error="connection refused or timed out (-5)";
|
---|
811 | default:
|
---|
812 | $this->error="connection failed (".$errno.")";
|
---|
813 | }
|
---|
814 | return false;
|
---|
815 | }
|
---|
816 | }
|
---|
817 | /*======================================================================*\
|
---|
818 | Function: _disconnect
|
---|
819 | Purpose: disconnect a socket connection
|
---|
820 | Input: $fp file pointer
|
---|
821 | \*======================================================================*/
|
---|
822 |
|
---|
823 | function _disconnect($fp)
|
---|
824 | {
|
---|
825 | return(fclose($fp));
|
---|
826 | }
|
---|
827 |
|
---|
828 |
|
---|
829 | /*======================================================================*\
|
---|
830 | Function: _prepare_post_body
|
---|
831 | Purpose: Prepare post body according to encoding type
|
---|
832 | Input: $formvars - form variables
|
---|
833 | $formfiles - form upload files
|
---|
834 | Output: post body
|
---|
835 | \*======================================================================*/
|
---|
836 |
|
---|
837 | function _prepare_post_body($formvars, $formfiles)
|
---|
838 | {
|
---|
839 | settype($formvars, "array");
|
---|
840 | settype($formfiles, "array");
|
---|
841 |
|
---|
842 | if (count($formvars) == 0 && count($formfiles) == 0)
|
---|
843 | return;
|
---|
844 |
|
---|
845 | switch ($this->_submit_type) {
|
---|
846 | case "application/x-www-form-urlencoded":
|
---|
847 | reset($formvars);
|
---|
848 | while(list($key,$val) = each($formvars)) {
|
---|
849 | if (is_array($val) || is_object($val)) {
|
---|
850 | while (list($cur_key, $cur_val) = each($val)) {
|
---|
851 | $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
|
---|
852 | }
|
---|
853 | } else
|
---|
854 | $postdata .= urlencode($key)."=".urlencode($val)."&";
|
---|
855 | }
|
---|
856 | break;
|
---|
857 |
|
---|
858 | case "multipart/form-data":
|
---|
859 | $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
|
---|
860 |
|
---|
861 | reset($formvars);
|
---|
862 | while(list($key,$val) = each($formvars)) {
|
---|
863 | if (is_array($val) || is_object($val)) {
|
---|
864 | while (list($cur_key, $cur_val) = each($val)) {
|
---|
865 | $postdata .= "--".$this->_mime_boundary."\r\n";
|
---|
866 | $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
|
---|
867 | $postdata .= "$cur_val\r\n";
|
---|
868 | }
|
---|
869 | } else {
|
---|
870 | $postdata .= "--".$this->_mime_boundary."\r\n";
|
---|
871 | $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
|
---|
872 | $postdata .= "$val\r\n";
|
---|
873 | }
|
---|
874 | }
|
---|
875 |
|
---|
876 | reset($formfiles);
|
---|
877 | while (list($field_name, $file_names) = each($formfiles)) {
|
---|
878 | settype($file_names, "array");
|
---|
879 | while (list(, $file_name) = each($file_names)) {
|
---|
880 | if (!is_readable($file_name)) continue;
|
---|
881 |
|
---|
882 | $fp = fopen($file_name, "r");
|
---|
883 | $file_content = fread($fp, filesize($file_name));
|
---|
884 | fclose($fp);
|
---|
885 | $base_name = basename($file_name);
|
---|
886 |
|
---|
887 | $postdata .= "--".$this->_mime_boundary."\r\n";
|
---|
888 | $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
|
---|
889 | $postdata .= "$file_content\r\n";
|
---|
890 | }
|
---|
891 | }
|
---|
892 | $postdata .= "--".$this->_mime_boundary."--\r\n";
|
---|
893 | break;
|
---|
894 | }
|
---|
895 |
|
---|
896 | return $postdata;
|
---|
897 | }
|
---|
898 | }
|
---|
899 |
|
---|
900 | ?>
|
---|