[sourcecode lang=”php”]
 //reject overly long 2 byte sequences,
 //as well as characters above U+10000
 //and replace with ?
 $some_string = preg_replace(‘/’.
 ‘[x00-x08x10x0Bx0Cx0E-x19x7F]’.
 ‘|[x00-x7F][x80-xBF]+’.
 ‘|([xC0xC1]|[xF0-xFF])[x80-xBF]*’.
 ‘|[xC2-xDF]((?![x80-xBF])|[x80-xBF]{2,})’.
 ‘|[xE0-xEF](([x80-xBF](?![x80-xBF]))|’.
 ‘(?![x80-xBF]{2})|[x80-xBF]{3,})’.
 ‘/S’,
 ‘?’, $some_string );
//reject overly long 3 byte sequences
 //and UTF-16 surrogates and replace with ?
 $some_string = preg_replace(‘/’.
 ‘xE0[x80-x9F][x80-xBF]’.
 ‘|xED[xA0-xBF][x80-xBF]’.
 ‘/S’,
 ‘?’, $some_string );
 [/sourcecode]
Via Remove non-UTF8 characters from string with PHP « Magp.ie.