- Home
- Categorie
- Coding e Sistemistica
- PHP
- Estrarre urls da una pagina
-
Estrarre urls da una pagina
Ho trovato questo script slla rete, che promette di estarre tutti gli url da un indirizzo internet, ma non funziona
Chi di voi è in grado di correggerlo?<?php
if (getenv('REQUEST_METHOD') == 'POST') {
$url = $_POST;
} else {
$url = $_GET;
}
?><form action="<?= $PHP_SELF ?>" method="POST">
URL:<input type="text" name="url" value="<?= $url ?>"/><input type="submit">
</form><?php
if ($url) {
$remote = fopen($url, 'r');
$html = fread($remote, 1048576);
fclose($remote);$urls = '(http|file|ftp)'; $ltrs = 'w'; $gunk = '/#~:.?+=&%@!-'; $punc = '.:?-'; $any = "$ltrs$gunk$punc"; preg_match_all("{ b $urls : [$any] +? (?= [$punc] * [^$any] | $ ) }x", $html, $matches); printf("Output of URLs %d URLs
n", sizeof($matches[0]));
foreach ($matches[0] as $u) {
$link = $PHP_SELF . '?url=' . urlencode($u);
echo "[url='$link']$u
n";
}
}
?>grazie
-
prova questo costruttore :
class LinkExtractor { /* private Array variable: $linkReg [ contains pregs to parse links ]*/ var $linkReg = Array( "/(?i)<a([^\a]+?)href='([^\a]+?)'/i", "/(?i)<a([^\a]+?)href=\"([^\a]+?)\"/i", "/(?i)<a([^\a]+?)href=([^\a]+?)[ |>]/i" ); /** * Public constructor. * Create a global Array with no value, used for parsing * and an internal array with valid pregs for links parsing. */ function LinkExtractor() { global $__linkExtractor_linkRecipient; $__linkExtractor_linkRecipient = Array(); } /** * Private method, popolate internal Array with preg matches * . * @Param String String to push into internal array * @Return nothing */ function __manageLinkRecipient( $replacement ) { global $__linkExtractor_linkRecipient; array_push( $__linkExtractor_linkRecipient, htmlspecialchars( $replacement[2] ) ); } /** * Private method, call preg_replace_callback function with string. * . * @Param String String to parse * @Return nothing */ function __callBackCaller( $st ) { preg_replace_callback( $this->linkReg, Array( &$this, '__manageLinkRecipient' ), $st ); } /** * Public method, read remote page or file and parse them * . * @Param String valid url address to parse * @Return Boolean true if readed , false in other cases */ function parseUrl( $url ) { if( @$fp = fopen( $url, "r" ) ) { $st = ''; while( $text = fread( $fp, 8192 ) ) { $st .= $text; } fclose( $fp ); $this->__callBackCaller( $st ); return true; } return false; } /** * Public method, parse links in a file * . * @Param String string to parse * @Return nothing */ function parseFile( $st ) { return $this->parseUrl( $st ); } /** * Public method, parse links in a string * . * @Param String string to parse * @Return nothing */ function parseString( $st ) { $this->__callBackCaller( $st ); } /** * Public method, return an array with all found links * . * @Param no no params need * @Return Array Array with all links ( if there're ) */ function getLinks() { global $__linkExtractor_linkRecipient; return $__linkExtractor_linkRecipient; } } function linkExtractor( $what, $url = false ) { $myLinks = &new LinkExtractor(); if( $url == false ) { $myLinks->parseString( $what ); } elseif( $url == true ) { if( $myLinks->parseUrl( $what ) == false ) { return false; } } return $myLinks->getLinks(); } // EXAMPLE $url = "http://www.php.net/"; // site to parse $arrayLinks = &linkExtractor( $url, true ); if( $arrayLinks != false ) { for( $a = 0, $b = count( $arrayLinks ); $a < $b; $a++ ) { echo $arrayLinks[$a]." "; } } ```[/img]