PHP robots.txt parsing
- by omfgroflmao
Is there an easiest way to do this?
function parse_robots_txt($URL){
$parsed = parse_url($URL);
$robots = file_get_contents('http://'.$parsed['host'].'/robots.txt',FILE_TEXT);
$exploded = explode('user-agent:',strtolower($robots));
foreach($exploded as $user_agent){
$user_agent = trim($user_agent);
if(substr($user_agent,0,1) == '*'){
$user_agent = str_replace('#','',preg_replace('/#.*\\n/i','',$user_agent));
$user_agent = str_replace('disallow:','',substr($user_agent,1));
$user_agent = preg_replace('/allow:/i', '+-+-+-+', $user_agent, 1);
$user_agent = str_replace('allow:','',$user_agent);
print_r(explode('+-+-+-+',$user_agent));
}
}
}