00001 <?php
00002
00003
00007 define('UNICODE_ERROR', -1);
00008
00012 define('UNICODE_SINGLEBYTE', 0);
00013
00018 define('UNICODE_MULTIBYTE', 1);
00019
00023 function unicode_check() {
00024 list($GLOBALS['multibyte']) = _unicode_check();
00025 }
00026
00038 function _unicode_check() {
00039
00040 $t = get_t();
00041
00042
00043 setlocale(LC_CTYPE, 'C');
00044
00045
00046
00047 if (preg_match('/[à-á]/u', 'â')) {
00048 return array(UNICODE_ERROR, $t('The PCRE library in your PHP installation is outdated. This will cause problems when handling Unicode text. If you are running PHP 4.3.3 or higher, make sure you are using the PCRE library supplied by PHP. Please refer to the <a href="@url">PHP PCRE documentation</a> for more information.', array('@url' => 'http://www.php.net/pcre')));
00049 }
00050
00051
00052 if (!function_exists('mb_strlen')) {
00053 return array(UNICODE_SINGLEBYTE, $t('Operations on Unicode strings are emulated on a best-effort basis. Install the <a href="@url">PHP mbstring extension</a> for improved Unicode support.', array('@url' => 'http://www.php.net/mbstring')));
00054 }
00055
00056
00057 if (ini_get('mbstring.func_overload') != 0) {
00058 return array(UNICODE_ERROR, $t('Multibyte string function overloading in PHP is active and must be disabled. Check the php.ini <em>mbstring.func_overload</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', array('@url' => 'http://www.php.net/mbstring')));
00059 }
00060 if (ini_get('mbstring.encoding_translation') != 0) {
00061 return array(UNICODE_ERROR, $t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.encoding_translation</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', array('@url' => 'http://www.php.net/mbstring')));
00062 }
00063 if (ini_get('mbstring.http_input') != 'pass') {
00064 return array(UNICODE_ERROR, $t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_input</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', array('@url' => 'http://www.php.net/mbstring')));
00065 }
00066 if (ini_get('mbstring.http_output') != 'pass') {
00067 return array(UNICODE_ERROR, $t('Multibyte string output conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_output</em> setting. Please refer to the <a href="@url">PHP mbstring documentation</a> for more information.', array('@url' => 'http://www.php.net/mbstring')));
00068 }
00069
00070
00071 mb_internal_encoding('utf-8');
00072 mb_language('uni');
00073 return array(UNICODE_MULTIBYTE, '');
00074 }
00075
00079 function unicode_requirements() {
00080
00081 $t = get_t();
00082
00083 $libraries = array(
00084 UNICODE_SINGLEBYTE => $t('Standard PHP'),
00085 UNICODE_MULTIBYTE => $t('PHP Mbstring Extension'),
00086 UNICODE_ERROR => $t('Error'),
00087 );
00088 $severities = array(
00089 UNICODE_SINGLEBYTE => REQUIREMENT_WARNING,
00090 UNICODE_MULTIBYTE => REQUIREMENT_OK,
00091 UNICODE_ERROR => REQUIREMENT_ERROR,
00092 );
00093 list($library, $description) = _unicode_check();
00094
00095 $requirements['unicode'] = array(
00096 'title' => $t('Unicode library'),
00097 'value' => $libraries[$library],
00098 );
00099 if ($description) {
00100 $requirements['unicode']['description'] = $description;
00101 }
00102
00103 $requirements['unicode']['severity'] = $severities[$library];
00104
00105 return $requirements;
00106 }
00107
00126 function drupal_xml_parser_create(&$data) {
00127
00128 $encoding = 'utf-8';
00129 $bom = FALSE;
00130
00131
00132 if (!strncmp($data, "\xEF\xBB\xBF", 3)) {
00133 $bom = TRUE;
00134 $data = substr($data, 3);
00135 }
00136
00137
00138 if (!$bom && ereg('^<\?xml[^>]+encoding="([^"]+)"', $data, $match)) {
00139 $encoding = $match[1];
00140 }
00141
00142
00143 $php_supported = array('utf-8', 'iso-8859-1', 'us-ascii');
00144 if (!in_array(strtolower($encoding), $php_supported)) {
00145 $out = drupal_convert_to_utf8($data, $encoding);
00146 if ($out !== FALSE) {
00147 $encoding = 'utf-8';
00148 $data = ereg_replace('^(<\?xml[^>]+encoding)="([^"]+)"', '\\1="utf-8"', $out);
00149 }
00150 else {
00151 watchdog('php', 'Could not convert XML encoding %s to UTF-8.', array('%s' => $encoding), WATCHDOG_WARNING);
00152 return 0;
00153 }
00154 }
00155
00156 $xml_parser = xml_parser_create($encoding);
00157 xml_parser_set_option($xml_parser, XML_OPTION_TARGET_ENCODING, 'utf-8');
00158 return $xml_parser;
00159 }
00160
00173 function drupal_convert_to_utf8($data, $encoding) {
00174 if (function_exists('iconv')) {
00175 $out = @iconv($encoding, 'utf-8', $data);
00176 }
00177 else if (function_exists('mb_convert_encoding')) {
00178 $out = @mb_convert_encoding($data, 'utf-8', $encoding);
00179 }
00180 else if (function_exists('recode_string')) {
00181 $out = @recode_string($encoding . '..utf-8', $data);
00182 }
00183 else {
00184 watchdog('php', 'Unsupported encoding %s. Please install iconv, GNU recode or mbstring for PHP.', array('%s' => $encoding), WATCHDOG_ERROR);
00185 return FALSE;
00186 }
00187
00188 return $out;
00189 }
00190
00209 function drupal_truncate_bytes($string, $len) {
00210 if (strlen($string) <= $len) {
00211 return $string;
00212 }
00213 if ((ord($string[$len]) < 0x80) || (ord($string[$len]) >= 0xC0)) {
00214 return substr($string, 0, $len);
00215 }
00216 while (--$len >= 0 && ord($string[$len]) >= 0x80 && ord($string[$len]) < 0xC0) {};
00217 return substr($string, 0, $len);
00218 }
00219
00234 function truncate_utf8($string, $len, $wordsafe = FALSE, $dots = FALSE) {
00235
00236 if (drupal_strlen($string) <= $len) {
00237 return $string;
00238 }
00239
00240 if ($dots) {
00241 $len -= 4;
00242 }
00243
00244 if ($wordsafe) {
00245 $string = drupal_substr($string, 0, $len + 1);
00246 if ($last_space = strrpos($string, ' ')) {
00247 $string = substr($string, 0, $last_space);
00248 }
00249 else {
00250 $string = drupal_substr($string, 0, $len);
00251 }
00252 }
00253 else {
00254 $string = drupal_substr($string, 0, $len);
00255 }
00256
00257 if ($dots) {
00258 $string .= ' ...';
00259 }
00260
00261 return $string;
00262 }
00263
00279 function mime_header_encode($string) {
00280 if (preg_match('/[^\x20-\x7E]/', $string)) {
00281 $chunk_size = 47;
00282 $len = strlen($string);
00283 $output = '';
00284 while ($len > 0) {
00285 $chunk = drupal_truncate_bytes($string, $chunk_size);
00286 $output .= ' =?UTF-8?B?' . base64_encode($chunk) . "?=\n";
00287 $c = strlen($chunk);
00288 $string = substr($string, $c);
00289 $len -= $c;
00290 }
00291 return trim($output);
00292 }
00293 return $string;
00294 }
00295
00299 function mime_header_decode($header) {
00300
00301 $header = preg_replace_callback('/=\?([^?]+)\?(Q|B)\?([^?]+|\?(?!=))\?=\s+(?==\?)/', '_mime_header_decode', $header);
00302
00303 return preg_replace_callback('/=\?([^?]+)\?(Q|B)\?([^?]+|\?(?!=))\?=/', '_mime_header_decode', $header);
00304 }
00305
00309 function _mime_header_decode($matches) {
00310
00311
00312
00313
00314 $data = ($matches[2] == 'B') ? base64_decode($matches[3]) : str_replace('_', ' ', quoted_printable_decode($matches[3]));
00315 if (strtolower($matches[1]) != 'utf-8') {
00316 $data = drupal_convert_to_utf8($data, $matches[1]);
00317 }
00318 return $data;
00319 }
00320
00331 function decode_entities($text, $exclude = array()) {
00332 static $table;
00333
00334 if (!isset($table)) {
00335
00336 $table = array_flip(get_html_translation_table(HTML_ENTITIES));
00337
00338 $table = array_map('utf8_encode', $table);
00339
00340 $table['''] = "'";
00341 }
00342 $newtable = array_diff($table, $exclude);
00343
00344
00345 return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $newtable, $exclude)', $text);
00346 }
00347
00351 function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) {
00352
00353 if (!$prefix) {
00354 if (isset($table[$original])) {
00355 return $table[$original];
00356 }
00357 else {
00358 return $original;
00359 }
00360 }
00361
00362 if ($prefix == '#x') {
00363 $codepoint = base_convert($codepoint, 16, 10);
00364 }
00365
00366 else {
00367 $codepoint = preg_replace('/^0+/', '', $codepoint);
00368 }
00369
00370 if ($codepoint < 0x80) {
00371 $str = chr($codepoint);
00372 }
00373 else if ($codepoint < 0x800) {
00374 $str = chr(0xC0 | ($codepoint >> 6))
00375 . chr(0x80 | ($codepoint & 0x3F));
00376 }
00377 else if ($codepoint < 0x10000) {
00378 $str = chr(0xE0 | ( $codepoint >> 12))
00379 . chr(0x80 | (($codepoint >> 6) & 0x3F))
00380 . chr(0x80 | ( $codepoint & 0x3F));
00381 }
00382 else if ($codepoint < 0x200000) {
00383 $str = chr(0xF0 | ( $codepoint >> 18))
00384 . chr(0x80 | (($codepoint >> 12) & 0x3F))
00385 . chr(0x80 | (($codepoint >> 6) & 0x3F))
00386 . chr(0x80 | ( $codepoint & 0x3F));
00387 }
00388
00389 if (in_array($str, $exclude)) {
00390 return $original;
00391 }
00392 else {
00393 return $str;
00394 }
00395 }
00396
00401 function drupal_strlen($text) {
00402 global $multibyte;
00403 if ($multibyte == UNICODE_MULTIBYTE) {
00404 return mb_strlen($text);
00405 }
00406 else {
00407
00408 return strlen(preg_replace("/[\x80-\xBF]/", '', $text));
00409 }
00410 }
00411
00415 function drupal_strtoupper($text) {
00416 global $multibyte;
00417 if ($multibyte == UNICODE_MULTIBYTE) {
00418 return mb_strtoupper($text);
00419 }
00420 else {
00421
00422 $text = strtoupper($text);
00423
00424 $text = preg_replace_callback('/\xC3[\xA0-\xB6\xB8-\xBE]/', '_unicode_caseflip', $text);
00425 return $text;
00426 }
00427 }
00428
00432 function drupal_strtolower($text) {
00433 global $multibyte;
00434 if ($multibyte == UNICODE_MULTIBYTE) {
00435 return mb_strtolower($text);
00436 }
00437 else {
00438
00439 $text = strtolower($text);
00440
00441 $text = preg_replace_callback('/\xC3[\x80-\x96\x98-\x9E]/', '_unicode_caseflip', $text);
00442 return $text;
00443 }
00444 }
00445
00450 function _unicode_caseflip($matches) {
00451 return $matches[0][0] . chr(ord($matches[0][1]) ^ 32);
00452 }
00453
00457 function drupal_ucfirst($text) {
00458
00459 return drupal_strtoupper(drupal_substr($text, 0, 1)) . drupal_substr($text, 1);
00460 }
00461
00470 function drupal_substr($text, $start, $length = NULL) {
00471 global $multibyte;
00472 if ($multibyte == UNICODE_MULTIBYTE) {
00473 return $length === NULL ? mb_substr($text, $start) : mb_substr($text, $start, $length);
00474 }
00475 else {
00476 $strlen = strlen($text);
00477
00478 $bytes = 0;
00479 if ($start > 0) {
00480
00481
00482 $bytes = -1; $chars = -1;
00483 while ($bytes < $strlen && $chars < $start) {
00484 $bytes++;
00485 $c = ord($text[$bytes]);
00486 if ($c < 0x80 || $c >= 0xC0) {
00487 $chars++;
00488 }
00489 }
00490 }
00491 else if ($start < 0) {
00492
00493
00494 $start = abs($start);
00495 $bytes = $strlen; $chars = 0;
00496 while ($bytes > 0 && $chars < $start) {
00497 $bytes--;
00498 $c = ord($text[$bytes]);
00499 if ($c < 0x80 || $c >= 0xC0) {
00500 $chars++;
00501 }
00502 }
00503 }
00504 $istart = $bytes;
00505
00506
00507 if ($length === NULL) {
00508 $bytes = $strlen - 1;
00509 }
00510 else if ($length > 0) {
00511
00512
00513 $bytes = $istart; $chars = 0;
00514 while ($bytes < $strlen && $chars < $length) {
00515 $bytes++;
00516 $c = ord($text[$bytes]);
00517 if ($c < 0x80 || $c >= 0xC0) {
00518 $chars++;
00519 }
00520 }
00521 $bytes--;
00522 }
00523 else if ($length < 0) {
00524
00525
00526 $length = abs($length);
00527 $bytes = $strlen - 1; $chars = 0;
00528 while ($bytes >= 0 && $chars < $length) {
00529 $c = ord($text[$bytes]);
00530 if ($c < 0x80 || $c >= 0xC0) {
00531 $chars++;
00532 }
00533 $bytes--;
00534 }
00535 }
00536 $iend = $bytes;
00537
00538 return substr($text, $istart, max(0, $iend - $istart + 1));
00539 }
00540 }
00541
00542