â?? in my hmtl after purify
- by mmcgrail
I have a database the i am rebuilding the table structure was crap so I'm porting some of the data from one table to another. This data appears to have been copy past from MSO product so as I'm getting the data I clean it up with htmlpurifier and some alittle str_replace in php here the clean function
function clean_html($html) {
$config = HTMLPurifier_Config::createDefault();
$config->set('AutoFormat','RemoveEmpty',true);
$config->set('HTML','AllowedAttributes','href,src');
$config->set('HTML','AllowedElements','p,em,strong,a,ul,li,ol,img');
$purifier = new HTMLPurifier($config);
$html = $purifier->purify($html);
$html = str_replace(' ',' ',$html);
$html = str_replace("\r",'',$html);
$html = str_replace("\n",'',$html);
$html = str_replace("\t",'',$html);
$html = str_replace(' ',' ',$html);
$html = str_replace('<p> </p>','',$html);
$html = str_replace(chr(160),' ',$html);
return trim($html);
}
but when I put the results into my new table and out put them to the ckeditor I get those three characters.
I then have a javascript function that is called to remove special characters from the content of the ckeditor too. it doesn't clean it either
function remove_special(str) {
var rExps=[ /[\xC0-\xC2]/g, /[\xE0-\xE2]/g,
/[\xC8-\xCA]/g, /[\xE8-\xEB]/g,
/[\xCC-\xCE]/g, /[\xEC-\xEE]/g,
/[\xD2-\xD4]/g, /[\xF2-\xF4]/g,
/[\xD9-\xDB]/g, /[\xF9-\xFB]/g,
/\xD1/,/\xF1/g,
"/[\u00a0|\u1680|[\u2000-\u2009]|u200a|\u200b|\u2028|\u2029|\u202f|\u205f|\u3000|\xa0]/g",
/\u000b/g,'/[\u180e|\u000c]/g',
/\u2013/g, /\u2014/g,
/\xa9/g,/\xae/g,/\xb7/g,/\u2018/g,/\u2019/g,/\u201c/g,/\u201d/g,/\u2026/g];
var repChar=['A','a','E','e','I','i','O','o','U','u','N','n',' ','\t','','-','--','(c)','(r)','*',"'","'",'"','"','...'];
for(var i=0; i<rExps.length; i++) {
str=str.replace(rExps[i],repChar[i]);
}
for (var x = 0; x < str.length; x++) {
charcode = str.charCodeAt(x);
if ((charcode < 32 || charcode > 126) && charcode !=10 && charcode != 13) {
str = str.replace(str.charAt(x), "");
}
}
return str;
}
Does anyone know off hand what I need to do to get rid of them. I think they may be some sort of quote