- Home
- Categorie
- Coding e Sistemistica
- Coding
- Estrarre urls da una pagina
Estrarre urls da una pagina
Ho trovato questo script slla rete, che promette di estarre tutti gli url da un indirizzo internet, ma non funziona
Chi di voi è in grado di correggerlo?<?php
if (getenv('REQUEST_METHOD') == 'POST') {
$url = $_POST;
} else {
$url = $_GET;
?><form action="<?= $PHP_SELF ?>" method="POST">
URL:<input type="text" name="url" value="<?= $url ?>"/><input type="submit">
if ($url) {
$remote = fopen($url, 'r');
$html = fread($remote, 1048576);
fclose($remote);$urls = '(http|file|ftp)'; $ltrs = 'w'; $gunk = '/#~:.?+=&%@!-'; $punc = '.:?-'; $any = "$ltrs$gunk$punc"; preg_match_all("{ b $urls : [$any] +? (?= [$punc] * [^$any] | $ ) }x", $html, $matches); printf("Output of URLs %d URLs
n", sizeof($matches[0]));
foreach ($matches[0] as $u) {
$link = $PHP_SELF . '?url=' . urlencode($u);
echo "[url='$link']$u
prova questo costruttore :
class LinkExtractor { /* private Array variable: $linkReg [ contains pregs to parse links ]*/ var $linkReg = Array( "/(?i)<a([^\a]+?)href='([^\a]+?)'/i", "/(?i)<a([^\a]+?)href=\"([^\a]+?)\"/i", "/(?i)<a([^\a]+?)href=([^\a]+?)[ |>]/i" ); /** * Public constructor. * Create a global Array with no value, used for parsing * and an internal array with valid pregs for links parsing. */ function LinkExtractor() { global $__linkExtractor_linkRecipient; $__linkExtractor_linkRecipient = Array(); } /** * Private method, popolate internal Array with preg matches * . * @Param String String to push into internal array * @Return nothing */ function __manageLinkRecipient( $replacement ) { global $__linkExtractor_linkRecipient; array_push( $__linkExtractor_linkRecipient, htmlspecialchars( $replacement[2] ) ); } /** * Private method, call preg_replace_callback function with string. * . * @Param String String to parse * @Return nothing */ function __callBackCaller( $st ) { preg_replace_callback( $this->linkReg, Array( &$this, '__manageLinkRecipient' ), $st ); } /** * Public method, read remote page or file and parse them * . * @Param String valid url address to parse * @Return Boolean true if readed , false in other cases */ function parseUrl( $url ) { if( @$fp = fopen( $url, "r" ) ) { $st = ''; while( $text = fread( $fp, 8192 ) ) { $st .= $text; } fclose( $fp ); $this->__callBackCaller( $st ); return true; } return false; } /** * Public method, parse links in a file * . * @Param String string to parse * @Return nothing */ function parseFile( $st ) { return $this->parseUrl( $st ); } /** * Public method, parse links in a string * . * @Param String string to parse * @Return nothing */ function parseString( $st ) { $this->__callBackCaller( $st ); } /** * Public method, return an array with all found links * . * @Param no no params need * @Return Array Array with all links ( if there're ) */ function getLinks() { global $__linkExtractor_linkRecipient; return $__linkExtractor_linkRecipient; } } function linkExtractor( $what, $url = false ) { $myLinks = &new LinkExtractor(); if( $url == false ) { $myLinks->parseString( $what ); } elseif( $url == true ) { if( $myLinks->parseUrl( $what ) == false ) { return false; } } return $myLinks->getLinks(); } // EXAMPLE $url = "http://www.php.net/"; // site to parse $arrayLinks = &linkExtractor( $url, true ); if( $arrayLinks != false ) { for( $a = 0, $b = count( $arrayLinks ); $a < $b; $a++ ) { echo $arrayLinks[$a]." "; } } ```[/img]