Here is my canonical fuction I changed a bit so it passes PCI scans (XSS issue) and a few minor changes :
// remove duplicate content with canonical tag by Spooks 12/2009
function CanonicalLink( $xhtml = false , $ssl = 'SSL' ) {
global $request_type;
$rem_index = true; // Set to true to additionally remove index.php from the uri
$close_tag = ( false === $xhtml ? ' >' : ' />' );
$spage = '';
$domain = ( $request_type == 'SSL' && $ssl == 'SSL' ? HTTPS_SERVER : HTTP_SERVER ); // gets the base URI
// Find the file basename safely = PHP_SELF is unreliable - SCRIPT_NAME can show path to phpcgi
if ( array_key_exists( 'SCRIPT_NAME', $_SERVER ) && ( substr( basename( $_SERVER['SCRIPT_NAME'] ), -4, 4 ) == '.php' ) ) {
$basefile = basename( $_SERVER['SCRIPT_NAME'] );
} elseif ( array_key_exists( 'PHP_SELF', $_SERVER ) && ( substr( basename( $_SERVER['PHP_SELF'] ), -4, 4 ) == '.php' ) ) {
$basefile = basename( $_SERVER['PHP_SELF'] );
} else {
// No base file so we have to return nothing
return false;
}
// Don't produce canonicals for SSL pages that bots shouldn't see
$ignore_array = array( 'account', 'address', 'checkout', 'login', 'password', 'logoff' );
// partial match to ssl filenames
foreach ( $ignore_array as $value ) {
$spage .= '(' . $value . ')|';
}
$spage = rtrim($spage,'|');
if (preg_match("/$spage/", $basefile)) return false;
// REQUEST_URI usually doesn't exist on Windows servers ( sometimes ORIG_PATH_INFO doesn't either )
if ( array_key_exists( 'REQUEST_URI', $_SERVER ) ) {
$request_uri = $_SERVER['REQUEST_URI'];
} elseif( array_key_exists( 'ORIG_PATH_INFO', $_SERVER ) ) {
$request_uri = $_SERVER['ORIG_PATH_INFO'];
} else {
// we need to fail here as we have no REQUEST_URI and return no canonical link html
return false;
}
$remove_array = array( 'currency', 'language', 'main_page', 'page', 'sort', 'ref', 'affiliate_banner_id', 'max', 'gclid');
// Add to this array any additional params you need to remove in the same format as the existing
$page_remove_array = array(
FILENAME_PRODUCT_INFO => array('manufacturers_id', 'cPath', 'reviews_id', 'keywords', 'gclid', 'filter_id', 'inc_subcat', 'pfrom', 'pto', 'dfrom', 'dto', 'fl'),
FILENAME_DEFAULT => array('sort', 'filter_id', 'src', 'OVRAW', 'OVKEY', 'OVMTC', 'OVADID', 'OVKWID', 'ysmwa'),
FILENAME_CATEGORIES => array('manufacturers_id', 'cPath', 'reviews_id', 'keywords', 'gclid', 'filter_id'),
FILENAME_PRODUCT_REVIEWS => array('manufacturers_id', 'cPath', 'keywords', 'gclid', 'filter_id'),
FILENAME_ADVANCED_SEARCH_RESULT => array('manufacturers_id', 'cPath', 'keywords', 'gclid', 'filter_id', 'x', 'y', 'inc_subcat', 'categories_id', 'pfrom', 'pto', 'dto', 'dfrom'),
FILENAME_ADVANCED_SEARCH => array('manufacturers_id', 'cPath', 'keywords', 'gclid', 'filter_id')
);
// remove page specific params, should be in same format as previous, given is manufacturers_id & cPath
// have to be removed in product_info.php only
if (is_array($page_remove_array[$basefile])) $remove_array = array_merge($remove_array, $page_remove_array[$basefile]);
foreach ( $remove_array as $value ) {
$search[] = '/&*' . $value . '[=\/]+[\-\]+[\w%..\+]*\/?/i';
}
$search[] = ('/&*osCsid.*/');
$search[] = ('/\?\z/');
if ($rem_index) $search[] = ('/index.html\/*/');
$request_uri = preg_replace('/\?&/', '?', preg_replace($search, '', $request_uri ));
//XSS isssue resolved here
$request_uri = str_replace("<", "<", $request_uri);
$request_uri = str_replace(">", ">", $request_uri);
$request_uri = str_replace("'", "'", $request_uri);
$request_uri = str_replace("\"", """, $request_uri);
$request_uri = str_replace(")", ")", $request_uri);
$request_uri = str_replace("(", "(", $request_uri);
// added this in for home page issues modify if you do not use a sub folder or is a different name for the cart system
if (($request_uri == '/catalog/') || ($request_uri == '/catalog/index.php')){
echo '<link rel="canonical" href="' . $domain . '"' . $close_tag . PHP_EOL;
}else{
echo '<link rel="canonical" href="' . $domain . $request_uri . '"' . $close_tag . PHP_EOL;
}
}
///
Nice function by the way....I would highly recommend this be added to all shops to remove duplicate content issues.
cheers