<?
function assess($link,$weigh){
if (strlen(link)-1==strrpos($link,"\/")){ // If link ends with a "/"
$link=substr($link,0,strlen($link)-1); // Delete last "/"
}
$link_size=strlen($link);
$place=strpos($link,"\:\/\/");
$proto=substr($link,0,$place);
$link=substr($link,$place+1,$link_size-($place+1)); // we use only URI composed with domain names, not protocol anymore
$tld_list=".ac,.ad,.ae,.aero,.af,.ag,.ai,.al,.am,.an,.ao,.aq,.ar,.as,.at,.au,.aw,.az,.ba,.bb,.bd,.be,.bf,.bg,.bh,.bi,.biz,.bj,.bm,.bn,.bo,.br,.bs,.bt,
.bv,.bw,.by,.bz,.ca,.cat,.cc,.cd,.cf,.cg,.ch,.ci,.ck,.cl,.cm,.cn,.co,.com,.coop,.cr,.cs,.cu,.cv,.cx,.cy,.cz,.de,.dj,.dk,.dm,.do,.dz,.ec,.edu,.ee,.eg,
.eh,.er,.es,.et,.eu,.fi,.fj,.fk,.fm,.fo,.fr,.ga,.gb,.gd,.ge,.gf,.gg,.gh,.gi,.gl,.gm,.gn,.gov,.gp,.gq,.gr,.gs,.gt,.gu,.gw,.gy,.hk,.hm,.hn,.hr,.ht,.hu,.id,
.ie,.il,.im,.in,.info,.int,.io,.iq,.ir,.is,.it,.je,.jm,.jo,.jobs,.jp,.ke,.kg,.kh,.ki,.km,.kn,.kp,.kr,.kw,.ky,.kz,.la,.lb,.lc,.li,.lk,.lr,.ls,.lt,.lu,
.,v,.ly,.ma,.mc,.md,.mg,.mh,.mil,.mk,.ml,.mm,.mn,.mo,.mobi,.mp,.mq,.mr,.ms,.mt,.mu,.museum,.mv,.mw,.mx,.my,.mz,.na,.name,.nc,.ne,.net,.nf,.ng,.ni,.nl,.no,
.np,.nr,.nu,.nz,.om,.org,.pa,.pe,.pf,.pg,.ph,.pk,.pl,.pm,.pn,.pr,.ps,.pt,.pw,.py,.qa,.re,.ro,.ru,.rw,.sa,.sb,.sc,.sd,.se,.sg,.sh,.si,.sj,.sk,.sl,.sm,
.sn,.so,.sr,.st,.su,.sv,.sy,.sz,.tc,.td,.tf,.tg,.th,.tj,.tk,.tm,.tn,.to,.tp,.tr,.travel,.tt,.tv,.tw,.tz,.ua,.ug,.uk,.um,.us,.uy,.uz,.va,.vc,.ve,.vg,
.vi,.vn,.vu,.wf,.ws,.ye,.yt,.yu,.za,.zm,.zr,.zw "
//Decrease weigh for protocol not so secure
if (!strcmp($proto,"https"))$weigh=$weigh;
elseif (!strcmp($proto,"http"))$weigh=$weigh-1;
else $weigh=$weigh-2;
//Decrease weigh for a "/" in URI
if strpos($link,"\/")$weigh=$weigh-1;
//Decrease weigh for a "%" in URI
if strpos($link,"\%")$weigh=$weigh-1;
//Decrease weigh for a "&#" in URI
if strpos($link,"\&\#")$weigh=$weigh-2;
//Decrease weigh for a "." afterthe last "/" in URI, which is not normal
if (strrpos($link,"\.")>strrpos($link,"\/")) $weigh=$weigh-5;
//Decrease weigh for a TLD extension followed by another TLD extension further in the link
for ($tld=strok($tld_list, ",");$tld!="";$tld=strok(",")){
if (!strcmp(substr($link,$tld+strlen($tld),1),"\.") || (!strcmp(substr($link,$tld+strlen($tld),1),"\/")){
for ($tld2=strok($tld_list, ",");$tld2!=$tld;$tld2=strok(",")){
if strrpos($link,$tld2)>strrpos($link,$tld) $weigh=$weigh-4;
}
}
}
return $weigh;
}
function detect_error ($link){
$result="";
if strpos($link,"\;"){
$return="BAD LINK";
}
if strpos($link,"\ "){
$return="BAD LINK";
}
if strpos($link,"\*"){
$return="BAD LINK";
}
if strpos($link,"\@"){ // Here we forbid userauth cos it's often used for phishing purposes
$return="BAD LINK";
}
if strpos($link,"\'"){ // Does it have to be used in a main page ? no. So let's forbid it
$return="BAD LINK";
}
if strpos($link,"\""){ // Does it have to be used in a main page ? no. So let's forbid it
$return="BAD LINK";
}
return $result;
}
function sanitize($link){
$link=strtolower(trim($link)); // clean input
if (strlen(link)-1==strrpos($link,"\/")){ // If link ends with a "/"
$link=substr($link,0,strlen($link)-1); // Delete last "/"
}
$set="\/\&\?\@\%"; // set with special chars
$position=strspn($link,$set); // Find if one of this char is used
$link=substr($link,0,$position); // If a special char is used, we stop the uri before this char
return $link;
}
// Main
//////////////////////////////////////////////////////
// FOLLOWING VALUES CAN BE CHANGED
$initial_weigh=10; // Before we assess the link, we assign a value to
the link consistency CHANGE THIS VALUE IF REQUIRED
$test_passed_weigh=8; // This is the value we MUST obtain after
assessing the test, when link consistency is more than this value, we
follow the link, when it's less, we sanitize the link and follow
//////////////////////////////////////////////////////
//print headers
print "<html>";
$link_to_go_to=$_POST['link_to_go_to'];
if (!strcmp(detect_error($link_to_go_to),"BAD LINK")){
print"<body><center>Your link seems to be made of
error(s). Please take caution before using
it</center></body>";
}
$link_assessment=assess($link,$initial_weigh);
$sanitized_link=sanitize($link);
if ($link_assessment<$test_passed_weigh ) {
print "<head><META http-equiv=\"refresh\" content=\"1;URL=".$sanitized_link."\"></head>";
print "<body><center>Your link has been rated
".$link_assessment." / ".$initial_weigh. "<BR> You'll be
redirected in 1 second to a sanitized link :
".$sanitized_link."</center></body>";
}
else{
print "<head><META http-equiv=\"refresh\" content=\"1;URL=".$link."\"></head>";
print "<body><center>Your link has been rated
".$link_assessment." / ".$initial_weigh. "<BR> You'll be
redirected in 1 second</center></body>";
}
//print footers
print "</html>";
?>