The input string.
* @param array $result The result will be returned into this reference parameter.
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return bool
* Numeric or hexadecimal code points, or a UTF-8 character to start from.
* @param int|string $var2 Numeric or hexadecimal code points, or a UTF-8 character to end at.
* @param bool $use_ctype use ctype to detect numeric and hexadecimal, otherwise we will use a simple
* "is_numeric"
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param float|int $step [optional]
* If a step value is given, it will be used as the
* increment between elements in the sequence. step
* should be given as a positive number. If not specified,
* step will default to 1.
*
*
* @psalm-pure
*
* @return string[]
*/
public static function range(
$var1,
$var2,
bool $use_ctype = true,
string $encoding = 'UTF-8',
$step = 1
): array {
if (!$var1 || !$var2) {
return [];
}
if ($step !== 1) {
/**
* @psalm-suppress RedundantConditionGivenDocblockType
* @psalm-suppress DocblockTypeContradiction
*/
if (!\is_numeric($step)) {
throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
}
/**
* @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
*/
if ($step <= 0) {
throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
}
}
if ($use_ctype && self::$SUPPORT['ctype'] === false) {
throw new \RuntimeException('ext-ctype: is not installed');
}
$is_digit = false;
$is_xdigit = false;
/** @noinspection PhpComposerExtensionStubsInspection */
if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
$is_digit = true;
$start = (int) $var1;
} /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
$is_xdigit = true;
$start = (int) self::hex_to_int((string) $var1);
} elseif (!$use_ctype && \is_numeric($var1)) {
$start = (int) $var1;
} else {
$start = self::ord((string) $var1);
}
if (!$start) {
return [];
}
if ($is_digit) {
$end = (int) $var2;
} elseif ($is_xdigit) {
$end = (int) self::hex_to_int((string) $var2);
} elseif (!$use_ctype && \is_numeric($var2)) {
$end = (int) $var2;
} else {
$end = self::ord((string) $var2);
}
if (!$end) {
return [];
}
$array = [];
foreach (\range($start, $end, $step) as $i) {
$array[] = (string) self::chr((int) $i, $encoding);
}
return $array;
}
/**
* Multi decode HTML entity + fix urlencoded-win1252-chars.
*
* EXAMPLE: The input string.
* @param bool $multi_decode Decode as often as possible.
*
* @psalm-pure
*
* @return string
* The decoded URL, as a string.
*/
public static function rawurldecode(string $str, bool $multi_decode = true): string
{
if ($str === '') {
return '';
}
$str = self::urldecode_unicode_helper($str);
if ($multi_decode) {
do {
$str_compare = $str;
/**
* @psalm-suppress PossiblyInvalidArgument
*/
$str = \rawurldecode(
self::html_entity_decode(
self::to_utf8($str),
\ENT_QUOTES | \ENT_HTML5
)
);
} while ($str_compare !== $str);
} else {
/**
* @psalm-suppress PossiblyInvalidArgument
*/
$str = \rawurldecode(
self::html_entity_decode(
self::to_utf8($str),
\ENT_QUOTES | \ENT_HTML5
)
);
}
return self::fix_simple_utf8($str);
}
/**
* Replaces all occurrences of $pattern in $str by $replacement.
*
* @param string $str The input string.
* @param string $pattern The regular expression pattern.
* @param string $replacement The string to replace with.
* @param string $options [optional] Matching conditions to be used.
* @param string $delimiter [optional] Delimiter the the regex. Default: '/'
*
* @psalm-pure
*
* @return string
*/
public static function regex_replace(
string $str,
string $pattern,
string $replacement,
string $options = '',
string $delimiter = '/'
): string {
if ($options === 'msr') {
$options = 'ms';
}
// fallback
if (!$delimiter) {
$delimiter = '/';
}
return (string) \preg_replace(
$delimiter . $pattern . $delimiter . 'u' . $options,
$replacement,
$str
);
}
/**
* alias for "UTF8::remove_bom()"
*
* @param string $str
*
* @psalm-pure
*
* @return string
*
* @see UTF8::remove_bom()
* @deprecated The input string.
*
* @psalm-pure
*
* @return string
* A string without UTF-BOM.
*/
public static function remove_bom(string $str): string
{
if ($str === '') {
return '';
}
$str_length = \strlen($str);
foreach (self::$BOM as $bom_string => $bom_byte_length) {
if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
/** @var false|string $str_tmp - needed for PhpStan (stubs error) */
$str_tmp = \substr($str, $bom_byte_length, $str_length);
if ($str_tmp === false) {
return '';
}
$str_length -= (int) $bom_byte_length;
$str = (string) $str_tmp;
}
}
return $str;
}
/**
* Removes duplicate occurrences of a string in another string.
*
* EXAMPLE: The base string.
* @param string|string[] $what String to search for in the base string.
*
* @psalm-pure
*
* @return string
* A string with removed duplicates.
*/
public static function remove_duplicates(string $str, $what = ' '): string
{
if (\is_string($what)) {
$what = [$what];
}
/**
* @psalm-suppress RedundantConditionGivenDocblockType
*/
if (\is_array($what)) {
foreach ($what as $item) {
$str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
}
}
return $str;
}
/**
* Remove html via "strip_tags()" from the string.
*
* @param string $str The input string.
* @param string $allowable_tags [optional] You can use the optional second parameter to specify tags which
* should not be stripped. Default: null
*
*
* @psalm-pure
*
* @return string
* A string with without html tags.
*/
public static function remove_html(string $str, string $allowable_tags = ''): string
{
return \strip_tags($str, $allowable_tags);
}
/**
* Remove all breaks [The input string.
* @param string $replacement [optional] Default is a empty string.
*
* @psalm-pure
*
* @return string
* A string without breaks.
*/
public static function remove_html_breaks(string $str, string $replacement = ''): string
{
return (string) \preg_replace("#/\r\n|\r|\n|#isU", $replacement, $str);
}
/**
* Remove invisible characters from a string.
*
* e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
*
* EXAMPLE: UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'
*
* copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
*
* @param string $str The input string.
* @param bool $url_encoded [optional]
* Try to remove url encoded control character.
* WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
*
* Default: false
*
* @param string $replacement [optional] The replacement character.
* @param bool $keep_basic_control_characters [optional] Keep control characters like [LRM] or [LSEP].
*
* @psalm-pure
*
* @return string
* A string without invisible chars.
*/
public static function remove_invisible_characters(
string $str,
bool $url_encoded = false,
string $replacement = '',
bool $keep_basic_control_characters = true
): string {
return ASCII::remove_invisible_characters(
$str,
$url_encoded,
$replacement,
$keep_basic_control_characters
);
}
/**
* Returns a new string with the prefix $substring removed, if present.
*
* @param string $str The input string.
* @param string $substring The prefix to remove.
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
* A string without the prefix $substring.
*/
public static function remove_left(
string $str,
string $substring,
string $encoding = 'UTF-8'
): string {
if (
$substring
&&
\strpos($str, $substring) === 0
) {
if ($encoding === 'UTF-8') {
return (string) \mb_substr(
$str,
(int) \mb_strlen($substring)
);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
return (string) self::substr(
$str,
(int) self::strlen($substring, $encoding),
null,
$encoding
);
}
return $str;
}
/**
* Returns a new string with the suffix $substring removed, if present.
*
* @param string $str
* @param string $substring The suffix to remove.
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
* A string having a $str without the suffix $substring.
*/
public static function remove_right(
string $str,
string $substring,
string $encoding = 'UTF-8'
): string {
if ($substring && \substr($str, -\strlen($substring)) === $substring) {
if ($encoding === 'UTF-8') {
return (string) \mb_substr(
$str,
0,
(int) \mb_strlen($str) - (int) \mb_strlen($substring)
);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
return (string) self::substr(
$str,
0,
(int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
$encoding
);
}
return $str;
}
/**
* Replaces all occurrences of $search in $str by $replacement.
*
* @param string $str The input string.
* @param string $search The needle to search for.
* @param string $replacement The string to replace with.
* @param bool $case_sensitive [optional] Whether or not to enforce case-sensitivity. Default: true
*
* @psalm-pure
*
* @return string
* A string with replaced parts.
*/
public static function replace(
string $str,
string $search,
string $replacement,
bool $case_sensitive = true
): string {
if ($case_sensitive) {
return \str_replace($search, $replacement, $str);
}
return self::str_ireplace($search, $replacement, $str);
}
/**
* Replaces all occurrences of $search in $str by $replacement.
*
* @param string $str The input string.
* @param array $search The elements to search for.
* @param array|string $replacement The string to replace with.
* @param bool $case_sensitive [optional] Whether or not to enforce case-sensitivity. Default: true
*
* @psalm-pure
*
* @return string
* A string with replaced parts.
*/
public static function replace_all(
string $str,
array $search,
$replacement,
bool $case_sensitive = true
): string {
if ($case_sensitive) {
return \str_replace($search, $replacement, $str);
}
return self::str_ireplace($search, $replacement, $str);
}
/**
* Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
*
* EXAMPLE: UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'
*
* @param string $str The input string
* @param string $replacement_char The replacement character.
* @param bool $process_invalid_utf8_chars Convert invalid UTF-8 chars
*
* @psalm-pure
*
* @return string
* A string without diamond question marks (�).
*/
public static function replace_diamond_question_mark(
string $str,
string $replacement_char = '',
bool $process_invalid_utf8_chars = true
): string {
if ($str === '') {
return '';
}
if ($process_invalid_utf8_chars) {
if ($replacement_char === '') {
$replacement_char_helper = 'none';
} else {
$replacement_char_helper = \ord($replacement_char);
}
if (self::$SUPPORT['mbstring'] === false) {
// if there is no native support for "mbstring",
// then we need to clean the string before ...
$str = self::clean($str);
}
/**
* @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
*/
$save = \mb_substitute_character();
/** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
@\mb_substitute_character($replacement_char_helper);
// the polyfill maybe return false, so cast to string
$str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
\mb_substitute_character($save);
}
return \str_replace(
[
"\xEF\xBF\xBD",
'�',
],
[
$replacement_char,
$replacement_char,
],
$str
);
}
/**
* Strip whitespace or other characters from the end of a UTF-8 string.
*
* EXAMPLE: UTF8::rtrim('-ABC-中文空白- '); // '-ABC-中文空白-'
*
* @param string $str The string to be trimmed.
* @param string|null $chars Optional characters to be stripped.
*
* @psalm-pure
*
* @return string
* A string with unwanted characters stripped from the right.
*/
public static function rtrim(string $str = '', string $chars = null): string
{
if ($str === '') {
return '';
}
if (self::$SUPPORT['mbstring'] === true) {
if ($chars !== null) {
/** @noinspection PregQuoteUsageInspection */
$chars = \preg_quote($chars);
$pattern = "[${chars}]+$";
} else {
$pattern = '[\\s]+$';
}
/** @noinspection PhpComposerExtensionStubsInspection */
return (string) \mb_ereg_replace($pattern, '', $str);
}
if ($chars !== null) {
$chars = \preg_quote($chars, '/');
$pattern = "[${chars}]+$";
} else {
$pattern = '[\\s]+$';
}
return self::regex_replace($str, $pattern, '');
}
/**
* WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
*
* @param bool $useEcho
*
* @psalm-pure
*
* @return string|void
*/
public static function showSupport(bool $useEcho = true)
{
// init
$html = '';
$html .= '';
/** @noinspection AlterInForeachInspection */
foreach (self::$SUPPORT as $key => &$value) {
$html .= $key . ' - ' . \print_r($value, true) . "\n
";
}
$html .= '
';
if ($useEcho) {
echo $html;
}
return $html;
}
/**
* Converts a UTF-8 character to HTML Numbered Entity like "{".
*
* EXAMPLE: UTF8::single_chr_html_encode('κ'); // 'κ'
*
* @param string $char The Unicode character to be encoded as numbered entity.
* @param bool $keep_ascii_chars Set to true to keep ASCII chars.>
* @param string $encoding [optional]
Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
* The HTML numbered entity for the given character.
*/
public static function single_chr_html_encode(
string $char,
bool $keep_ascii_chars = false,
string $encoding = 'UTF-8'
): string {
if ($char === '') {
return '';
}
if (
$keep_ascii_chars
&&
ASCII::is_ascii($char)
) {
return $char;
}
return '' . self::ord($char, $encoding) . ';';
}
/**
* @param string $str
* @param int $tab_length
*
* @psalm-pure
*
* @return string
*/
public static function spaces_to_tabs(string $str, int $tab_length = 4): string
{
if ($tab_length === 4) {
$tab = ' ';
} elseif ($tab_length === 2) {
$tab = ' ';
} else {
$tab = \str_repeat(' ', $tab_length);
}
return \str_replace($tab, "\t", $str);
}
/**
* alias for "UTF8::str_split()"
*
* @param int|string $str
* @param int $length
* @param bool $clean_utf8
*
* @psalm-pure
*
* @return string[]
*
* @see UTF8::str_split()
* @deprecated please use "UTF8::str_split()"
*/
public static function split(
$str,
int $length = 1,
bool $clean_utf8 = false
): array {
/** @var string[] */
return self::str_split($str, $length, $clean_utf8);
}
/**
* alias for "UTF8::str_starts_with()"
*
* @param string $haystack
* @param string $needle
*
* @psalm-pure
*
* @return bool
*
* @see UTF8::str_starts_with()
* @deprecated please use "UTF8::str_starts_with()"
*/
public static function str_begins(string $haystack, string $needle): bool
{
return self::str_starts_with($haystack, $needle);
}
/**
* Returns a camelCase version of the string. Trims surrounding spaces,
* capitalizes letters following digits, spaces, dashes and underscores,
* and removes spaces, dashes, as well as underscores.
*
* @param string $str The input string.
* @param string $encoding [optional] Default: 'UTF-8'
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
* @param string|null $lang [optional] Set the language for special cases: az, el, lt,
* tr
* @param bool $try_to_keep_the_string_length [optional] true === try to keep the string length: e.g. ẞ
* -> ß
*
* @psalm-pure
*
* @return string
*/
public static function str_camelize(
string $str,
string $encoding = 'UTF-8',
bool $clean_utf8 = false,
string $lang = null,
bool $try_to_keep_the_string_length = false
): string {
if ($clean_utf8) {
$str = self::clean($str);
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
$str = self::lcfirst(
\trim($str),
$encoding,
false,
$lang,
$try_to_keep_the_string_length
);
$str = (string) \preg_replace('/^[-_]+/', '', $str);
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
$str = (string) \preg_replace_callback(
'/[-_\\s]+(.)?/u',
/**
* @param array $match
*
* @psalm-pure
*
* @return string
*/
static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
if (isset($match[1])) {
if ($use_mb_functions) {
if ($encoding === 'UTF-8') {
return \mb_strtoupper($match[1]);
}
return \mb_strtoupper($match[1], $encoding);
}
return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
}
return '';
},
$str
);
return (string) \preg_replace_callback(
'/[\\p{N}]+(.)?/u',
/**
* @param array $match
*
* @psalm-pure
*
* @return string
*/
static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
if ($use_mb_functions) {
if ($encoding === 'UTF-8') {
return \mb_strtoupper($match[0]);
}
return \mb_strtoupper($match[0], $encoding);
}
return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
},
$str
);
}
/**
* Returns the string with the first letter of each word capitalized,
* except for when the word is a name which shouldn't be capitalized.
*
* @param string $str
*
* @psalm-pure
*
* @return string
* A string with $str capitalized.
*/
public static function str_capitalize_name(string $str): string
{
return self::str_capitalize_name_helper(
self::str_capitalize_name_helper(
self::collapse_whitespace($str),
' '
),
'-'
);
}
/**
* Returns true if the string contains $needle, false otherwise. By default
* the comparison is case-sensitive, but can be made insensitive by setting
* $case_sensitive to false.
*
* @param string $haystack The input string.
* @param string $needle Substring to look for.
* @param bool $case_sensitive [optional] Whether or not to enforce case-sensitivity. Default: true
*
* @psalm-pure
*
* @return bool
* Whether or not $haystack contains $needle.
*/
public static function str_contains(
string $haystack,
string $needle,
bool $case_sensitive = true
): bool {
if ($case_sensitive) {
if (\PHP_VERSION_ID >= 80000) {
/** @phpstan-ignore-next-line - only for PHP8 */
return \str_contains($haystack, $needle);
}
return \strpos($haystack, $needle) !== false;
}
return \mb_stripos($haystack, $needle) !== false;
}
/**
* Returns true if the string contains all $needles, false otherwise. By
* default the comparison is case-sensitive, but can be made insensitive by
* setting $case_sensitive to false.
*
* @param string $haystack The input string.
* @param array $needles SubStrings to look for.
* @param bool $case_sensitive [optional] Whether or not to enforce case-sensitivity. Default: true
*
* @psalm-pure
*
* @return bool
* Whether or not $haystack contains $needle.
*/
public static function str_contains_all(
string $haystack,
array $needles,
bool $case_sensitive = true
): bool {
if ($haystack === '' || $needles === []) {
return false;
}
/** @noinspection LoopWhichDoesNotLoopInspection */
foreach ($needles as &$needle) {
if ($case_sensitive) {
/** @noinspection NestedPositiveIfStatementsInspection */
if (!$needle || \strpos($haystack, $needle) === false) {
return false;
}
}
if (!$needle || \mb_stripos($haystack, $needle) === false) {
return false;
}
}
return true;
}
/**
* Returns true if the string contains any $needles, false otherwise. By
* default the comparison is case-sensitive, but can be made insensitive by
* setting $case_sensitive to false.
*
* @param string $haystack The input string.
* @param array $needles SubStrings to look for.
* @param bool $case_sensitive [optional] Whether or not to enforce case-sensitivity. Default: true
*
* @psalm-pure
*
* @return bool
* Whether or not $str contains $needle.
*/
public static function str_contains_any(
string $haystack,
array $needles,
bool $case_sensitive = true
): bool {
if ($haystack === '' || $needles === []) {
return false;
}
/** @noinspection LoopWhichDoesNotLoopInspection */
foreach ($needles as &$needle) {
if (!$needle) {
continue;
}
if ($case_sensitive) {
if (\strpos($haystack, $needle) !== false) {
return true;
}
continue;
}
if (\mb_stripos($haystack, $needle) !== false) {
return true;
}
}
return false;
}
/**
* Returns a lowercase and trimmed string separated by dashes. Dashes are
* inserted before uppercase characters (with the exception of the first
* character of the string), and in place of spaces as well as underscores.
*
* @param string $str The input string.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
*/
public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
{
return self::str_delimit($str, '-', $encoding);
}
/**
* Returns a lowercase and trimmed string separated by the given delimiter.
* Delimiters are inserted before uppercase characters (with the exception
* of the first character of the string), and in place of spaces, dashes,
* and underscores. Alpha delimiters are not converted to lowercase.
*
* @param string $str The input string.
* @param string $delimiter Sequence used to separate parts of the string.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
* @param string|null $lang [optional] Set the language for special cases: az, el, lt,
* tr
* @param bool $try_to_keep_the_string_length [optional] true === try to keep the string length: e.g. ẞ ->
* ß
*
* @psalm-pure
*
* @return string
*/
public static function str_delimit(
string $str,
string $delimiter,
string $encoding = 'UTF-8',
bool $clean_utf8 = false,
string $lang = null,
bool $try_to_keep_the_string_length = false
): string {
if (self::$SUPPORT['mbstring'] === true) {
/** @noinspection PhpComposerExtensionStubsInspection */
$str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
if ($use_mb_functions && $encoding === 'UTF-8') {
$str = \mb_strtolower($str);
} else {
$str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
}
/** @noinspection PhpComposerExtensionStubsInspection */
return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
}
$str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
if ($use_mb_functions && $encoding === 'UTF-8') {
$str = \mb_strtolower($str);
} else {
$str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
}
return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
}
/**
* Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
*
* EXAMPLE:
* UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
* UTF8::str_detect_encoding('Abc'); // 'ASCII'
*
*
* @param string $str The input string.
*
* @psalm-pure
*
* @return false|string
*
* The detected string-encoding e.g. UTF-8 or UTF-16BE,
* otherwise it will return false e.g. for BINARY or not detected encoding.
*
*/
public static function str_detect_encoding($str)
{
// init
$str = (string) $str;
//
// 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
//
if (self::is_binary($str, true)) {
$is_utf32 = self::is_utf32($str, false);
if ($is_utf32 === 1) {
return 'UTF-32LE';
}
if ($is_utf32 === 2) {
return 'UTF-32BE';
}
$is_utf16 = self::is_utf16($str, false);
if ($is_utf16 === 1) {
return 'UTF-16LE';
}
if ($is_utf16 === 2) {
return 'UTF-16BE';
}
// is binary but not "UTF-16" or "UTF-32"
return false;
}
//
// 2.) simple check for ASCII chars
//
if (ASCII::is_ascii($str)) {
return 'ASCII';
}
//
// 3.) simple check for UTF-8 chars
//
if (self::is_utf8_string($str)) {
return 'UTF-8';
}
//
// 4.) check via "mb_detect_encoding()"
//
// INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
$encoding_detecting_order = [
'ISO-8859-1',
'ISO-8859-2',
'ISO-8859-3',
'ISO-8859-4',
'ISO-8859-5',
'ISO-8859-6',
'ISO-8859-7',
'ISO-8859-8',
'ISO-8859-9',
'ISO-8859-10',
'ISO-8859-13',
'ISO-8859-14',
'ISO-8859-15',
'ISO-8859-16',
'WINDOWS-1251',
'WINDOWS-1252',
'WINDOWS-1254',
'CP932',
'CP936',
'CP950',
'CP866',
'CP850',
'CP51932',
'CP50220',
'CP50221',
'CP50222',
'ISO-2022-JP',
'ISO-2022-KR',
'JIS',
'JIS-ms',
'EUC-CN',
'EUC-JP',
];
if (self::$SUPPORT['mbstring'] === true) {
// info: do not use the symfony polyfill here
$encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
if ($encoding) {
return $encoding;
}
}
//
// 5.) check via "iconv()"
//
if (self::$ENCODINGS === null) {
self::$ENCODINGS = self::getData('encodings');
}
foreach (self::$ENCODINGS as $encoding_tmp) {
// INFO: //IGNORE but still throw notice
/** @noinspection PhpUsageOfSilenceOperatorInspection */
if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
return $encoding_tmp;
}
}
return false;
}
/**
* alias for "UTF8::str_ends_with()"
*
* @param string $haystack
* @param string $needle
*
* @psalm-pure
*
* @return bool
*
* @see UTF8::str_ends_with()
* @deprecated please use "UTF8::str_ends_with()"
*/
public static function str_ends(string $haystack, string $needle): bool
{
return self::str_ends_with($haystack, $needle);
}
/**
* Check if the string ends with the given substring.
*
* EXAMPLE:
* UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
* UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
*
*
* @param string $haystack The string to search in.
* @param string $needle The substring to search for.
*
* @psalm-pure
*
* @return bool
*/
public static function str_ends_with(string $haystack, string $needle): bool
{
if ($needle === '') {
return true;
}
if ($haystack === '') {
return false;
}
if (\PHP_VERSION_ID >= 80000) {
/** @phpstan-ignore-next-line - only for PHP8 */
return \str_ends_with($haystack, $needle);
}
return \substr($haystack, -\strlen($needle)) === $needle;
}
/**
* Returns true if the string ends with any of $substrings, false otherwise.
*
* - case-sensitive
*
* @param string $str The input string.
* @param string[] $substrings Substrings to look for.
*
* @psalm-pure
*
* @return bool
* Whether or not $str ends with $substring.
*/
public static function str_ends_with_any(string $str, array $substrings): bool
{
if ($substrings === []) {
return false;
}
foreach ($substrings as &$substring) {
if (\substr($str, -\strlen($substring)) === $substring) {
return true;
}
}
return false;
}
/**
* Ensures that the string begins with $substring. If it doesn't, it's
* prepended.
*
* @param string $str The input string.
* @param string $substring The substring to add if not present.
*
* @psalm-pure
*
* @return string
*/
public static function str_ensure_left(string $str, string $substring): string
{
if (
$substring !== ''
&&
\strpos($str, $substring) === 0
) {
return $str;
}
return $substring . $str;
}
/**
* Ensures that the string ends with $substring. If it doesn't, it's appended.
*
* @param string $str The input string.
* @param string $substring The substring to add if not present.
*
* @psalm-pure
*
* @return string
*/
public static function str_ensure_right(string $str, string $substring): string
{
if (
$str === ''
||
$substring === ''
||
\substr($str, -\strlen($substring)) !== $substring
) {
$str .= $substring;
}
return $str;
}
/**
* Capitalizes the first word of the string, replaces underscores with
* spaces, and strips '_id'.
*
* @param string $str
*
* @psalm-pure
*
* @return string
*/
public static function str_humanize($str): string
{
$str = \str_replace(
[
'_id',
'_',
],
[
'',
' ',
],
$str
);
return self::ucfirst(\trim($str));
}
/**
* alias for "UTF8::str_istarts_with()"
*
* @param string $haystack
* @param string $needle
*
* @psalm-pure
*
* @return bool
*
* @see UTF8::str_istarts_with()
* @deprecated please use "UTF8::str_istarts_with()"
*/
public static function str_ibegins(string $haystack, string $needle): bool
{
return self::str_istarts_with($haystack, $needle);
}
/**
* alias for "UTF8::str_iends_with()"
*
* @param string $haystack
* @param string $needle
*
* @psalm-pure
*
* @return bool
*
* @see UTF8::str_iends_with()
* @deprecated please use "UTF8::str_iends_with()"
*/
public static function str_iends(string $haystack, string $needle): bool
{
return self::str_iends_with($haystack, $needle);
}
/**
* Check if the string ends with the given substring, case-insensitive.
*
* EXAMPLE:
* UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
* UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
*
*
* @param string $haystack The string to search in.
* @param string $needle The substring to search for.
*
* @psalm-pure
*
* @return bool
*/
public static function str_iends_with(string $haystack, string $needle): bool
{
if ($needle === '') {
return true;
}
if ($haystack === '') {
return false;
}
return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
}
/**
* Returns true if the string ends with any of $substrings, false otherwise.
*
* - case-insensitive
*
* @param string $str The input string.
* @param string[] $substrings Substrings to look for.
*
* @psalm-pure
*
* @return bool
* Whether or not $str ends with $substring.
*/
public static function str_iends_with_any(string $str, array $substrings): bool
{
if ($substrings === []) {
return false;
}
foreach ($substrings as &$substring) {
if (self::str_iends_with($str, $substring)) {
return true;
}
}
return false;
}
/**
* Returns the index of the first occurrence of $needle in the string,
* and false if not found. Accepts an optional offset from which to begin
* the search.
*
* @param string $str The input string.
* @param string $needle Substring to look for.
* @param int $offset [optional] Offset from which to search. Default: 0
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return false|int
* The occurrence's index if found, otherwise false.
*
* @see UTF8::stripos()
* @deprecated please use "UTF8::stripos()"
*/
public static function str_iindex_first(
string $str,
string $needle,
int $offset = 0,
string $encoding = 'UTF-8'
) {
return self::stripos(
$str,
$needle,
$offset,
$encoding
);
}
/**
* Returns the index of the last occurrence of $needle in the string,
* and false if not found. Accepts an optional offset from which to begin
* the search. Offsets may be negative to count from the last character
* in the string.
*
* @param string $str The input string.
* @param string $needle Substring to look for.
* @param int $offset [optional] Offset from which to search. Default: 0
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return false|int
* The last occurrence's index if found, otherwise false.
*
* @see UTF8::strripos()
* @deprecated please use "UTF8::strripos()"
*/
public static function str_iindex_last(
string $str,
string $needle,
int $offset = 0,
string $encoding = 'UTF-8'
) {
return self::strripos(
$str,
$needle,
$offset,
$encoding
);
}
/**
* Returns the index of the first occurrence of $needle in the string,
* and false if not found. Accepts an optional offset from which to begin
* the search.
*
* @param string $str The input string.
* @param string $needle Substring to look for.
* @param int $offset [optional] Offset from which to search. Default: 0
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return false|int
* The occurrence's index if found, otherwise false.
*
* @see UTF8::strpos()
* @deprecated please use "UTF8::strpos()"
*/
public static function str_index_first(
string $str,
string $needle,
int $offset = 0,
string $encoding = 'UTF-8'
) {
return self::strpos(
$str,
$needle,
$offset,
$encoding
);
}
/**
* Returns the index of the last occurrence of $needle in the string,
* and false if not found. Accepts an optional offset from which to begin
* the search. Offsets may be negative to count from the last character
* in the string.
*
* @param string $str The input string.
* @param string $needle Substring to look for.
* @param int $offset [optional] Offset from which to search. Default: 0
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return false|int
* The last occurrence's index if found, otherwise false.
*
* @see UTF8::strrpos()
* @deprecated please use "UTF8::strrpos()"
*/
public static function str_index_last(
string $str,
string $needle,
int $offset = 0,
string $encoding = 'UTF-8'
) {
return self::strrpos(
$str,
$needle,
$offset,
$encoding
);
}
/**
* Inserts $substring into the string at the $index provided.
*
* @param string $str The input string.
* @param string $substring String to be inserted.
* @param int $index The index at which to insert the substring.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
*/
public static function str_insert(
string $str,
string $substring,
int $index,
string $encoding = 'UTF-8'
): string {
if ($encoding === 'UTF-8') {
$len = (int) \mb_strlen($str);
if ($index > $len) {
return $str;
}
/** @noinspection UnnecessaryCastingInspection */
return (string) \mb_substr($str, 0, $index) .
$substring .
(string) \mb_substr($str, $index, $len);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
$len = (int) self::strlen($str, $encoding);
if ($index > $len) {
return $str;
}
return ((string) self::substr($str, 0, $index, $encoding)) .
$substring .
((string) self::substr($str, $index, $len, $encoding));
}
/**
* Case-insensitive and UTF-8 safe version of str_replace.
*
* EXAMPLE:
* UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
*
*
* @see http://php.net/manual/en/function.str-ireplace.php
*
* @param string|string[] $search
* Every replacement with search array is
* performed on the result of previous replacement.
*
* @param string|string[] $replacement The replacement.
* @param string|string[] $subject
* If subject is an array, then the search and
* replace is performed with every entry of
* subject, and the return value is an array as
* well.
*
* @param int $count [optional]
* The number of matched and replaced needles will
* be returned in count which is passed by
* reference.
*
*
* @psalm-pure
*
* @return string|string[]
* A string or an array of replacements.
*
* @template TStrIReplaceSubject
* @phpstan-param TStrIReplaceSubject $subject
* @phpstan-return TStrIReplaceSubject
*/
public static function str_ireplace($search, $replacement, $subject, &$count = null)
{
$search = (array) $search;
/** @noinspection AlterInForeachInspection */
foreach ($search as &$s) {
$s = (string) $s;
if ($s === '') {
$s = '/^(?<=.)$/';
} else {
$s = '/' . \preg_quote($s, '/') . '/ui';
}
}
// fallback
/** @phpstan-ignore-next-line - only a fallback for PHP8 */
if ($replacement === null) {
$replacement = '';
}
/** @phpstan-ignore-next-line - only a fallback for PHP8 */
if ($subject === null) {
$subject = '';
}
/**
* @psalm-suppress PossiblyNullArgument
* @phpstan-var TStrIReplaceSubject $subject
*/
$subject = \preg_replace($search, $replacement, $subject, -1, $count);
return $subject;
}
/**
* Replaces $search from the beginning of string with $replacement.
*
* @param string $str The input string.
* @param string $search The string to search for.
* @param string $replacement The replacement.
*
* @psalm-pure
*
* @return string
* The string after the replacement.
*/
public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
{
if ($str === '') {
if ($replacement === '') {
return '';
}
if ($search === '') {
return $replacement;
}
}
if ($search === '') {
return $str . $replacement;
}
$searchLength = \strlen($search);
if (\strncasecmp($str, $search, $searchLength) === 0) {
return $replacement . \substr($str, $searchLength);
}
return $str;
}
/**
* Replaces $search from the ending of string with $replacement.
*
* @param string $str The input string.
* @param string $search The string to search for.
* @param string $replacement The replacement.
*
* @psalm-pure
*
* @return string
* The string after the replacement.
*/
public static function str_ireplace_ending(string $str, string $search, string $replacement): string
{
if ($str === '') {
if ($replacement === '') {
return '';
}
if ($search === '') {
return $replacement;
}
}
if ($search === '') {
return $str . $replacement;
}
if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
$str = \substr($str, 0, -\strlen($search)) . $replacement;
}
return $str;
}
/**
* Check if the string starts with the given substring, case-insensitive.
*
* EXAMPLE:
* UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
* UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
*
*
* @param string $haystack The string to search in.
* @param string $needle The substring to search for.
*
* @psalm-pure
*
* @return bool
*/
public static function str_istarts_with(string $haystack, string $needle): bool
{
if ($needle === '') {
return true;
}
if ($haystack === '') {
return false;
}
return self::stripos($haystack, $needle) === 0;
}
/**
* Returns true if the string begins with any of $substrings, false otherwise.
*
* - case-insensitive
*
* @param string $str The input string.
* @param array $substrings Substrings to look for.
*
* @psalm-pure
*
* @return bool
* Whether or not $str starts with $substring.
*/
public static function str_istarts_with_any(string $str, array $substrings): bool
{
if ($str === '') {
return false;
}
if ($substrings === []) {
return false;
}
foreach ($substrings as &$substring) {
if (self::str_istarts_with($str, $substring)) {
return true;
}
}
return false;
}
/**
* Gets the substring after the first occurrence of a separator.
*
* @param string $str The input string.
* @param string $separator The string separator.
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_isubstr_after_first_separator(
string $str,
string $separator,
string $encoding = 'UTF-8'
): string {
if ($separator === '' || $str === '') {
return '';
}
$offset = self::stripos($str, $separator);
if ($offset === false) {
return '';
}
if ($encoding === 'UTF-8') {
return (string) \mb_substr(
$str,
$offset + (int) \mb_strlen($separator)
);
}
return (string) self::substr(
$str,
$offset + (int) self::strlen($separator, $encoding),
null,
$encoding
);
}
/**
* Gets the substring after the last occurrence of a separator.
*
* @param string $str The input string.
* @param string $separator The string separator.
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_isubstr_after_last_separator(
string $str,
string $separator,
string $encoding = 'UTF-8'
): string {
if ($separator === '' || $str === '') {
return '';
}
$offset = self::strripos($str, $separator);
if ($offset === false) {
return '';
}
if ($encoding === 'UTF-8') {
return (string) \mb_substr(
$str,
$offset + (int) self::strlen($separator)
);
}
return (string) self::substr(
$str,
$offset + (int) self::strlen($separator, $encoding),
null,
$encoding
);
}
/**
* Gets the substring before the first occurrence of a separator.
*
* @param string $str The input string.
* @param string $separator The string separator.
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_isubstr_before_first_separator(
string $str,
string $separator,
string $encoding = 'UTF-8'
): string {
if ($separator === '' || $str === '') {
return '';
}
$offset = self::stripos($str, $separator);
if ($offset === false) {
return '';
}
if ($encoding === 'UTF-8') {
return (string) \mb_substr($str, 0, $offset);
}
return (string) self::substr($str, 0, $offset, $encoding);
}
/**
* Gets the substring before the last occurrence of a separator.
*
* @param string $str The input string.
* @param string $separator The string separator.
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_isubstr_before_last_separator(
string $str,
string $separator,
string $encoding = 'UTF-8'
): string {
if ($separator === '' || $str === '') {
return '';
}
if ($encoding === 'UTF-8') {
$offset = \mb_strripos($str, $separator);
if ($offset === false) {
return '';
}
return (string) \mb_substr($str, 0, $offset);
}
$offset = self::strripos($str, $separator, 0, $encoding);
if ($offset === false) {
return '';
}
return (string) self::substr($str, 0, $offset, $encoding);
}
/**
* Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
*
* @param string $str The input string.
* @param string $needle The string to look for.
* @param bool $before_needle [optional] Default: false
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_isubstr_first(
string $str,
string $needle,
bool $before_needle = false,
string $encoding = 'UTF-8'
): string {
if (
$needle === ''
||
$str === ''
) {
return '';
}
$part = self::stristr(
$str,
$needle,
$before_needle,
$encoding
);
if ($part === false) {
return '';
}
return $part;
}
/**
* Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
*
* @param string $str The input string.
* @param string $needle The string to look for.
* @param bool $before_needle [optional] Default: false
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_isubstr_last(
string $str,
string $needle,
bool $before_needle = false,
string $encoding = 'UTF-8'
): string {
if (
$needle === ''
||
$str === ''
) {
return '';
}
$part = self::strrichr(
$str,
$needle,
$before_needle,
$encoding
);
if ($part === false) {
return '';
}
return $part;
}
/**
* Returns the last $n characters of the string.
*
* @param string $str The input string.
* @param int $n Number of characters to retrieve from the end.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
*/
public static function str_last_char(
string $str,
int $n = 1,
string $encoding = 'UTF-8'
): string {
if ($str === '' || $n <= 0) {
return '';
}
if ($encoding === 'UTF-8') {
return (string) \mb_substr($str, -$n);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
return (string) self::substr($str, -$n, null, $encoding);
}
/**
* Limit the number of characters in a string.
*
* @param string $str The input string.
* @param int $length [optional] Default: 100
* @param string $str_add_on [optional] Default: …
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
*/
public static function str_limit(
string $str,
int $length = 100,
string $str_add_on = '…',
string $encoding = 'UTF-8'
): string {
if ($str === '' || $length <= 0) {
return '';
}
if ($encoding === 'UTF-8') {
if ((int) \mb_strlen($str) <= $length) {
return $str;
}
/** @noinspection UnnecessaryCastingInspection */
return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
if ((int) self::strlen($str, $encoding) <= $length) {
return $str;
}
return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
}
/**
* Limit the number of characters in a string, but also after the next word.
*
* EXAMPLE: UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'
*
* @param string $str The input string.
* @param int $length [optional] Default: 100
* @param string $str_add_on [optional] Default: …
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
*/
public static function str_limit_after_word(
string $str,
int $length = 100,
string $str_add_on = '…',
string $encoding = 'UTF-8'
): string {
if ($str === '' || $length <= 0) {
return '';
}
if ($encoding === 'UTF-8') {
/** @noinspection UnnecessaryCastingInspection */
if ((int) \mb_strlen($str) <= $length) {
return $str;
}
if (\mb_substr($str, $length - 1, 1) === ' ') {
return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
}
$str = \mb_substr($str, 0, $length);
$array = \explode(' ', $str, -1);
$new_str = \implode(' ', $array);
if ($new_str === '') {
return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
}
} else {
if ((int) self::strlen($str, $encoding) <= $length) {
return $str;
}
if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
}
/** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
$str = self::substr($str, 0, $length, $encoding);
/** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
if ($str === false) {
return '' . $str_add_on;
}
$array = \explode(' ', $str, -1);
$new_str = \implode(' ', $array);
if ($new_str === '') {
return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
}
}
return $new_str . $str_add_on;
}
/**
* Returns the longest common prefix between the $str1 and $str2.
*
* @param string $str1 The input sting.
* @param string $str2 Second string for comparison.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
*/
public static function str_longest_common_prefix(
string $str1,
string $str2,
string $encoding = 'UTF-8'
): string {
// init
$longest_common_prefix = '';
if ($encoding === 'UTF-8') {
$max_length = (int) \min(
\mb_strlen($str1),
\mb_strlen($str2)
);
for ($i = 0; $i < $max_length; ++$i) {
$char = \mb_substr($str1, $i, 1);
if (
$char !== false
&&
$char === \mb_substr($str2, $i, 1)
) {
$longest_common_prefix .= $char;
} else {
break;
}
}
} else {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
$max_length = (int) \min(
self::strlen($str1, $encoding),
self::strlen($str2, $encoding)
);
for ($i = 0; $i < $max_length; ++$i) {
$char = self::substr($str1, $i, 1, $encoding);
if (
$char !== false
&&
$char === self::substr($str2, $i, 1, $encoding)
) {
$longest_common_prefix .= $char;
} else {
break;
}
}
}
return $longest_common_prefix;
}
/**
* Returns the longest common substring between the $str1 and $str2.
* In the case of ties, it returns that which occurs first.
*
* @param string $str1
* @param string $str2 Second string for comparison.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
* A string with its $str being the longest common substring.
*/
public static function str_longest_common_substring(
string $str1,
string $str2,
string $encoding = 'UTF-8'
): string {
if ($str1 === '' || $str2 === '') {
return '';
}
// Uses dynamic programming to solve
// http://en.wikipedia.org/wiki/Longest_common_substring_problem
if ($encoding === 'UTF-8') {
$str_length = (int) \mb_strlen($str1);
$other_length = (int) \mb_strlen($str2);
} else {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
$str_length = (int) self::strlen($str1, $encoding);
$other_length = (int) self::strlen($str2, $encoding);
}
// Return if either string is empty
if ($str_length === 0 || $other_length === 0) {
return '';
}
$len = 0;
$end = 0;
$table = \array_fill(
0,
$str_length + 1,
\array_fill(0, $other_length + 1, 0)
);
if ($encoding === 'UTF-8') {
for ($i = 1; $i <= $str_length; ++$i) {
for ($j = 1; $j <= $other_length; ++$j) {
$str_char = \mb_substr($str1, $i - 1, 1);
$other_char = \mb_substr($str2, $j - 1, 1);
if ($str_char === $other_char) {
$table[$i][$j] = $table[$i - 1][$j - 1] + 1;
if ($table[$i][$j] > $len) {
$len = $table[$i][$j];
$end = $i;
}
} else {
$table[$i][$j] = 0;
}
}
}
} else {
for ($i = 1; $i <= $str_length; ++$i) {
for ($j = 1; $j <= $other_length; ++$j) {
$str_char = self::substr($str1, $i - 1, 1, $encoding);
$other_char = self::substr($str2, $j - 1, 1, $encoding);
if ($str_char === $other_char) {
$table[$i][$j] = $table[$i - 1][$j - 1] + 1;
if ($table[$i][$j] > $len) {
$len = $table[$i][$j];
$end = $i;
}
} else {
$table[$i][$j] = 0;
}
}
}
}
if ($encoding === 'UTF-8') {
return (string) \mb_substr($str1, $end - $len, $len);
}
return (string) self::substr($str1, $end - $len, $len, $encoding);
}
/**
* Returns the longest common suffix between the $str1 and $str2.
*
* @param string $str1
* @param string $str2 Second string for comparison.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
*/
public static function str_longest_common_suffix(
string $str1,
string $str2,
string $encoding = 'UTF-8'
): string {
if ($str1 === '' || $str2 === '') {
return '';
}
if ($encoding === 'UTF-8') {
$max_length = (int) \min(
\mb_strlen($str1, $encoding),
\mb_strlen($str2, $encoding)
);
$longest_common_suffix = '';
for ($i = 1; $i <= $max_length; ++$i) {
$char = \mb_substr($str1, -$i, 1);
if (
$char !== false
&&
$char === \mb_substr($str2, -$i, 1)
) {
$longest_common_suffix = $char . $longest_common_suffix;
} else {
break;
}
}
} else {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
$max_length = (int) \min(
self::strlen($str1, $encoding),
self::strlen($str2, $encoding)
);
$longest_common_suffix = '';
for ($i = 1; $i <= $max_length; ++$i) {
$char = self::substr($str1, -$i, 1, $encoding);
if (
$char !== false
&&
$char === self::substr($str2, -$i, 1, $encoding)
) {
$longest_common_suffix = $char . $longest_common_suffix;
} else {
break;
}
}
}
return $longest_common_suffix;
}
/**
* Returns true if $str matches the supplied pattern, false otherwise.
*
* @param string $str The input string.
* @param string $pattern Regex pattern to match against.
*
* @psalm-pure
*
* @return bool
* Whether or not $str matches the pattern.
*/
public static function str_matches_pattern(string $str, string $pattern): bool
{
return (bool) \preg_match('/' . $pattern . '/u', $str);
}
/**
* Returns whether or not a character exists at an index. Offsets may be
* negative to count from the last character in the string. Implements
* part of the ArrayAccess interface.
*
* @param string $str The input string.
* @param int $offset The index to check.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return bool
* Whether or not the index exists.
*/
public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
{
// init
$length = (int) self::strlen($str, $encoding);
if ($offset >= 0) {
return $length > $offset;
}
return $length >= \abs($offset);
}
/**
* Returns the character at the given index. Offsets may be negative to
* count from the last character in the string. Implements part of the
* ArrayAccess interface, and throws an OutOfBoundsException if the index
* does not exist.
*
* @param string $str The input string.
* @param int $index The index from which to retrieve the char.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @throws \OutOfBoundsException if the positive or negative offset does not exist
*
* @return string
* The character at the specified index.
*
* @psalm-pure
*/
public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
{
// init
$length = (int) self::strlen($str);
if (
($index >= 0 && $length <= $index)
||
$length < \abs($index)
) {
throw new \OutOfBoundsException('No character exists at the index');
}
return self::char_at($str, $index, $encoding);
}
/**
* Pad a UTF-8 string to a given length with another string.
*
* EXAMPLE: UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'
*
* @param string $str The input string.
* @param int $pad_length The length of return string.
* @param string $pad_string [optional] String to use for padding the input string.
* @param int|string $pad_type [optional]
* Can be STR_PAD_RIGHT (default), [or string "right"]
* STR_PAD_LEFT [or string "left"] or
* STR_PAD_BOTH [or string "both"]
*
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
* Returns the padded string.
*/
public static function str_pad(
string $str,
int $pad_length,
string $pad_string = ' ',
$pad_type = \STR_PAD_RIGHT,
string $encoding = 'UTF-8'
): string {
if ($pad_length === 0 || $pad_string === '') {
return $str;
}
if ($pad_type !== (int) $pad_type) {
if ($pad_type === 'left') {
$pad_type = \STR_PAD_LEFT;
} elseif ($pad_type === 'right') {
$pad_type = \STR_PAD_RIGHT;
} elseif ($pad_type === 'both') {
$pad_type = \STR_PAD_BOTH;
} else {
throw new \InvalidArgumentException(
'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
);
}
}
if ($encoding === 'UTF-8') {
$str_length = (int) \mb_strlen($str);
if ($pad_length >= $str_length) {
switch ($pad_type) {
case \STR_PAD_LEFT:
$ps_length = (int) \mb_strlen($pad_string);
$diff = ($pad_length - $str_length);
$pre = (string) \mb_substr(
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
0,
$diff
);
$post = '';
break;
case \STR_PAD_BOTH:
$diff = ($pad_length - $str_length);
$ps_length_left = (int) \floor($diff / 2);
$ps_length_right = (int) \ceil($diff / 2);
$pre = (string) \mb_substr(
\str_repeat($pad_string, $ps_length_left),
0,
$ps_length_left
);
$post = (string) \mb_substr(
\str_repeat($pad_string, $ps_length_right),
0,
$ps_length_right
);
break;
case \STR_PAD_RIGHT:
default:
$ps_length = (int) \mb_strlen($pad_string);
$diff = ($pad_length - $str_length);
$post = (string) \mb_substr(
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
0,
$diff
);
$pre = '';
}
return $pre . $str . $post;
}
return $str;
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
$str_length = (int) self::strlen($str, $encoding);
if ($pad_length >= $str_length) {
switch ($pad_type) {
case \STR_PAD_LEFT:
$ps_length = (int) self::strlen($pad_string, $encoding);
$diff = ($pad_length - $str_length);
$pre = (string) self::substr(
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
0,
$diff,
$encoding
);
$post = '';
break;
case \STR_PAD_BOTH:
$diff = ($pad_length - $str_length);
$ps_length_left = (int) \floor($diff / 2);
$ps_length_right = (int) \ceil($diff / 2);
$pre = (string) self::substr(
\str_repeat($pad_string, $ps_length_left),
0,
$ps_length_left,
$encoding
);
$post = (string) self::substr(
\str_repeat($pad_string, $ps_length_right),
0,
$ps_length_right,
$encoding
);
break;
case \STR_PAD_RIGHT:
default:
$ps_length = (int) self::strlen($pad_string, $encoding);
$diff = ($pad_length - $str_length);
$post = (string) self::substr(
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
0,
$diff,
$encoding
);
$pre = '';
}
return $pre . $str . $post;
}
return $str;
}
/**
* Returns a new string of a given length such that both sides of the
* string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
*
* @param string $str
* @param int $length Desired string length after padding.
* @param string $pad_str [optional] String used to pad, defaults to space. Default: ' '
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
* The string with padding applied.
*/
public static function str_pad_both(
string $str,
int $length,
string $pad_str = ' ',
string $encoding = 'UTF-8'
): string {
return self::str_pad(
$str,
$length,
$pad_str,
\STR_PAD_BOTH,
$encoding
);
}
/**
* Returns a new string of a given length such that the beginning of the
* string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
*
* @param string $str
* @param int $length Desired string length after padding.
* @param string $pad_str [optional] String used to pad, defaults to space. Default: ' '
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
* The string with left padding.
*/
public static function str_pad_left(
string $str,
int $length,
string $pad_str = ' ',
string $encoding = 'UTF-8'
): string {
return self::str_pad(
$str,
$length,
$pad_str,
\STR_PAD_LEFT,
$encoding
);
}
/**
* Returns a new string of a given length such that the end of the string
* is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
*
* @param string $str
* @param int $length Desired string length after padding.
* @param string $pad_str [optional] String used to pad, defaults to space. Default: ' '
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
* The string with right padding.
*/
public static function str_pad_right(
string $str,
int $length,
string $pad_str = ' ',
string $encoding = 'UTF-8'
): string {
return self::str_pad(
$str,
$length,
$pad_str,
\STR_PAD_RIGHT,
$encoding
);
}
/**
* Repeat a string.
*
* EXAMPLE: UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'
*
* @param string $str
* The string to be repeated.
*
* @param int $multiplier
* Number of time the input string should be
* repeated.
*
*
* multiplier has to be greater than or equal to 0.
* If the multiplier is set to 0, the function
* will return an empty string.
*
*
* @psalm-pure
*
* @return string
* The repeated string.
*/
public static function str_repeat(string $str, int $multiplier): string
{
$str = self::filter($str);
return \str_repeat($str, $multiplier);
}
/**
* INFO: This is only a wrapper for "str_replace()" -> the original functions is already UTF-8 safe.
*
* Replace all occurrences of the search string with the replacement string
*
* @see http://php.net/manual/en/function.str-replace.php
*
* @param string|string[] $search
* The value being searched for, otherwise known as the needle.
* An array may be used to designate multiple needles.
*
* @param string|string[] $replace
* The replacement value that replaces found search
* values. An array may be used to designate multiple replacements.
*
* @param string|string[] $subject
* The string or array of strings being searched and replaced on,
* otherwise known as the haystack.
*
*
* If subject is an array, then the search and
* replace is performed with every entry of
* subject, and the return value is an array as
* well.
*
* @param int|null $count [optional]
* If passed, this will hold the number of matched and replaced needles.
*
*
* @psalm-pure
*
* @return string|string[]
* This function returns a string or an array with the replaced values.
*
* @template TStrReplaceSubject
* @phpstan-param TStrReplaceSubject $subject
* @phpstan-return TStrReplaceSubject
*
* @deprecated please use \str_replace() instead
*/
public static function str_replace(
$search,
$replace,
$subject,
int &$count = null
) {
/**
* @psalm-suppress PossiblyNullArgument
* @phpstan-var TStrReplaceSubject $return;
*/
$return = \str_replace(
$search,
$replace,
$subject,
$count
);
return $return;
}
/**
* Replaces $search from the beginning of string with $replacement.
*
* @param string $str The input string.
* @param string $search The string to search for.
* @param string $replacement The replacement.
*
* @psalm-pure
*
* @return string
* A string after the replacements.
*/
public static function str_replace_beginning(
string $str,
string $search,
string $replacement
): string {
if ($str === '') {
if ($replacement === '') {
return '';
}
if ($search === '') {
return $replacement;
}
}
if ($search === '') {
return $str . $replacement;
}
$searchLength = \strlen($search);
if (\strncmp($str, $search, $searchLength) === 0) {
return $replacement . \substr($str, $searchLength);
}
return $str;
}
/**
* Replaces $search from the ending of string with $replacement.
*
* @param string $str The input string.
* @param string $search The string to search for.
* @param string $replacement The replacement.
*
* @psalm-pure
*
* @return string
* A string after the replacements.
*/
public static function str_replace_ending(
string $str,
string $search,
string $replacement
): string {
if ($str === '') {
if ($replacement === '') {
return '';
}
if ($search === '') {
return $replacement;
}
}
if ($search === '') {
return $str . $replacement;
}
if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
$str = \substr($str, 0, -\strlen($search)) . $replacement;
}
return $str;
}
/**
* Replace the first "$search"-term with the "$replace"-term.
*
* @param string $search
* @param string $replace
* @param string $subject
*
* @psalm-pure
*
* @return string
*
* @psalm-suppress InvalidReturnType
*/
public static function str_replace_first(
string $search,
string $replace,
string $subject
): string {
$pos = self::strpos($subject, $search);
if ($pos !== false) {
/**
* @psalm-suppress InvalidReturnStatement
*/
return self::substr_replace(
$subject,
$replace,
$pos,
(int) self::strlen($search)
);
}
return $subject;
}
/**
* Replace the last "$search"-term with the "$replace"-term.
*
* @param string $search
* @param string $replace
* @param string $subject
*
* @psalm-pure
*
* @return string
*
* @psalm-suppress InvalidReturnType
*/
public static function str_replace_last(
string $search,
string $replace,
string $subject
): string {
$pos = self::strrpos($subject, $search);
if ($pos !== false) {
/**
* @psalm-suppress InvalidReturnStatement
*/
return self::substr_replace(
$subject,
$replace,
$pos,
(int) self::strlen($search)
);
}
return $subject;
}
/**
* Shuffles all the characters in the string.
*
* INFO: uses random algorithm which is weak for cryptography purposes
*
* EXAMPLE: UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '
*
* @param string $str The input string
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @return string
* The shuffled string.
*/
public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
{
if ($encoding === 'UTF-8') {
$indexes = \range(0, (int) \mb_strlen($str) - 1);
/** @noinspection NonSecureShuffleUsageInspection */
\shuffle($indexes);
// init
$shuffled_str = '';
foreach ($indexes as &$i) {
$tmp_sub_str = \mb_substr($str, $i, 1);
if ($tmp_sub_str !== false) {
$shuffled_str .= $tmp_sub_str;
}
}
} else {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
$indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
/** @noinspection NonSecureShuffleUsageInspection */
\shuffle($indexes);
// init
$shuffled_str = '';
foreach ($indexes as &$i) {
$tmp_sub_str = self::substr($str, $i, 1, $encoding);
if ($tmp_sub_str !== false) {
$shuffled_str .= $tmp_sub_str;
}
}
}
return $shuffled_str;
}
/**
* Returns the substring beginning at $start, and up to, but not including
* the index specified by $end. If $end is omitted, the function extracts
* the remaining string. If $end is negative, it is computed from the end
* of the string.
*
* @param string $str
* @param int $start Initial index from which to begin extraction.
* @param int|null $end [optional] Index at which to end extraction. Default: null
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return false|string
* The extracted substring.
If str is shorter than start
* characters long, FALSE will be returned.
*/
public static function str_slice(
string $str,
int $start,
int $end = null,
string $encoding = 'UTF-8'
) {
if ($encoding === 'UTF-8') {
if ($end === null) {
$length = (int) \mb_strlen($str);
} elseif ($end >= 0 && $end <= $start) {
return '';
} elseif ($end < 0) {
$length = (int) \mb_strlen($str) + $end - $start;
} else {
$length = $end - $start;
}
return \mb_substr($str, $start, $length);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
if ($end === null) {
$length = (int) self::strlen($str, $encoding);
} elseif ($end >= 0 && $end <= $start) {
return '';
} elseif ($end < 0) {
$length = (int) self::strlen($str, $encoding) + $end - $start;
} else {
$length = $end - $start;
}
return self::substr($str, $start, $length, $encoding);
}
/**
* Convert a string to e.g.: "snake_case"
*
* @param string $str
* @param string $encoding [optional]
Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
* A string in snake_case.
*/
public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
{
if ($str === '') {
return '';
}
$str = \str_replace(
'-',
'_',
self::normalize_whitespace($str)
);
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
$str = (string) \preg_replace_callback(
'/([\\p{N}|\\p{Lu}])/u',
/**
* @param string[] $matches
*
* @psalm-pure
*
* @return string
*/
static function (array $matches) use ($encoding): string {
$match = $matches[1];
$match_int = (int) $match;
if ((string) $match_int === $match) {
return '_' . $match . '_';
}
if ($encoding === 'UTF-8') {
return '_' . \mb_strtolower($match);
}
return '_' . self::strtolower($match, $encoding);
},
$str
);
$str = (string) \preg_replace(
[
'/\\s+/u', // convert spaces to "_"
'/^\\s+|\\s+$/u', // trim leading & trailing spaces
'/_+/', // remove double "_"
],
[
'_',
'',
'_',
],
$str
);
return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
}
/**
* Sort all characters according to code points.
*
* EXAMPLE: UTF8::str_sort(' -ABC-中文空白- '); // ' ---ABC中文白空'
*
* @param string $str A UTF-8 string.
* @param bool $unique Sort unique. If true, repeated characters are ignored.
* @param bool $desc If true, will sort characters in reverse code point order.
*
* @psalm-pure
*
* @return string
* A string of sorted characters.
*/
public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
{
$array = self::codepoints($str);
if ($unique) {
$array = \array_flip(\array_flip($array));
}
if ($desc) {
\arsort($array);
} else {
\asort($array);
}
return self::string($array);
}
/**
* Convert a string to an array of Unicode characters.
*
* EXAMPLE:
* UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
*
*
* @param int[]|string[] $input The string[] or int[] to split into array.
* @param int $length [optional] Max character length of each array
* lement.
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the
* string.
* @param bool $try_to_use_mb_functions [optional] Set to false, if you don't want to use
* "mb_substr"
*
* @psalm-pure
*
* @return string[][]
* An array containing chunks of the input.
*/
public static function str_split_array(
array $input,
int $length = 1,
bool $clean_utf8 = false,
bool $try_to_use_mb_functions = true
): array {
foreach ($input as $k => &$v) {
$v = self::str_split(
$v,
$length,
$clean_utf8,
$try_to_use_mb_functions
);
}
/** @var string[][] $input */
return $input;
}
/**
* Convert a string to an array of unicode characters.
*
* EXAMPLE: UTF8::str_split('中文空白'); // array('中', '文', '空', '白')
*
* @param int|string $input The string or int to split into array.
* @param int $length [optional] Max character length of each array
* element.
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the
* string.
* @param bool $try_to_use_mb_functions [optional] Set to false, if you don't want to use
* "mb_substr"
*
* @psalm-pure
*
* @return string[]
* An array containing chunks of chars from the input.
*
* @noinspection SuspiciousBinaryOperationInspection
* @noinspection OffsetOperationsInspection
*/
public static function str_split(
$input,
int $length = 1,
bool $clean_utf8 = false,
bool $try_to_use_mb_functions = true
): array {
if ($length <= 0) {
return [];
}
// this is only an old fallback
/** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
/** @var int|int[]|string|string[] $input */
$input = $input;
if (\is_array($input)) {
/**
* @psalm-suppress InvalidReturnStatement
*/
return self::str_split_array(
$input,
$length,
$clean_utf8,
$try_to_use_mb_functions
);
}
// init
$input = (string) $input;
if ($input === '') {
return [];
}
if ($clean_utf8) {
$input = self::clean($input);
}
if (
$try_to_use_mb_functions
&&
self::$SUPPORT['mbstring'] === true
) {
if (\function_exists('mb_str_split')) {
/**
* @psalm-suppress ImpureFunctionCall - why?
*/
$return = \mb_str_split($input, $length);
if ($return !== false) {
return $return;
}
}
$i_max = \mb_strlen($input);
if ($i_max <= 127) {
$ret = [];
for ($i = 0; $i < $i_max; ++$i) {
$ret[] = \mb_substr($input, $i, 1);
}
} else {
$return_array = [];
\preg_match_all('/./us', $input, $return_array);
$ret = $return_array[0] ?? [];
}
} elseif (self::$SUPPORT['pcre_utf8'] === true) {
$return_array = [];
\preg_match_all('/./us', $input, $return_array);
$ret = $return_array[0] ?? [];
} else {
// fallback
$ret = [];
$len = \strlen($input);
/** @noinspection ForeachInvariantsInspection */
for ($i = 0; $i < $len; ++$i) {
if (($input[$i] & "\x80") === "\x00") {
$ret[] = $input[$i];
} elseif (
isset($input[$i + 1])
&&
($input[$i] & "\xE0") === "\xC0"
) {
if (($input[$i + 1] & "\xC0") === "\x80") {
$ret[] = $input[$i] . $input[$i + 1];
++$i;
}
} elseif (
isset($input[$i + 2])
&&
($input[$i] & "\xF0") === "\xE0"
) {
if (
($input[$i + 1] & "\xC0") === "\x80"
&&
($input[$i + 2] & "\xC0") === "\x80"
) {
$ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
$i += 2;
}
} elseif (
isset($input[$i + 3])
&&
($input[$i] & "\xF8") === "\xF0"
) {
if (
($input[$i + 1] & "\xC0") === "\x80"
&&
($input[$i + 2] & "\xC0") === "\x80"
&&
($input[$i + 3] & "\xC0") === "\x80"
) {
$ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
$i += 3;
}
}
}
}
if ($length > 1) {
$ret = \array_chunk($ret, $length);
return \array_map(
static function (array $item): string {
return \implode('', $item);
},
$ret
);
}
if (isset($ret[0]) && $ret[0] === '') {
return [];
}
return $ret;
}
/**
* Splits the string with the provided regular expression, returning an
* array of strings. An optional integer $limit will truncate the
* results.
*
* @param string $str
* @param string $pattern The regex with which to split the string.
* @param int $limit [optional] Maximum number of results to return. Default: -1 === no limit
*
* @psalm-pure
*
* @return string[]
* An array of strings.
*/
public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
{
if ($limit === 0) {
return [];
}
if ($pattern === '') {
return [$str];
}
if (self::$SUPPORT['mbstring'] === true) {
if ($limit >= 0) {
/** @noinspection PhpComposerExtensionStubsInspection */
$result_tmp = \mb_split($pattern, $str);
$result = [];
foreach ($result_tmp as $item_tmp) {
if ($limit === 0) {
break;
}
--$limit;
$result[] = $item_tmp;
}
return $result;
}
/** @noinspection PhpComposerExtensionStubsInspection */
return \mb_split($pattern, $str);
}
if ($limit > 0) {
++$limit;
} else {
$limit = -1;
}
$array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
if ($array === false) {
return [];
}
if ($limit > 0 && \count($array) === $limit) {
\array_pop($array);
}
return $array;
}
/**
* Check if the string starts with the given substring.
*
* EXAMPLE:
* UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
* UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
*
*
* @param string $haystack The string to search in.
* @param string $needle The substring to search for.
*
* @psalm-pure
*
* @return bool
*/
public static function str_starts_with(string $haystack, string $needle): bool
{
if ($needle === '') {
return true;
}
if ($haystack === '') {
return false;
}
if (\PHP_VERSION_ID >= 80000) {
/** @phpstan-ignore-next-line - only for PHP8 */
return \str_starts_with($haystack, $needle);
}
return \strncmp($haystack, $needle, \strlen($needle)) === 0;
}
/**
* Returns true if the string begins with any of $substrings, false otherwise.
*
* - case-sensitive
*
* @param string $str The input string.
* @param array $substrings Substrings to look for.
*
* @psalm-pure
*
* @return bool
* Whether or not $str starts with $substring.
*/
public static function str_starts_with_any(string $str, array $substrings): bool
{
if ($str === '') {
return false;
}
if ($substrings === []) {
return false;
}
foreach ($substrings as &$substring) {
if (self::str_starts_with($str, $substring)) {
return true;
}
}
return false;
}
/**
* Gets the substring after the first occurrence of a separator.
*
* @param string $str The input string.
* @param string $separator The string separator.
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
{
if ($separator === '' || $str === '') {
return '';
}
if ($encoding === 'UTF-8') {
$offset = \mb_strpos($str, $separator);
if ($offset === false) {
return '';
}
return (string) \mb_substr(
$str,
$offset + (int) \mb_strlen($separator)
);
}
$offset = self::strpos($str, $separator, 0, $encoding);
if ($offset === false) {
return '';
}
return (string) \mb_substr(
$str,
$offset + (int) self::strlen($separator, $encoding),
null,
$encoding
);
}
/**
* Gets the substring after the last occurrence of a separator.
*
* @param string $str The input string.
* @param string $separator The string separator.
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_substr_after_last_separator(
string $str,
string $separator,
string $encoding = 'UTF-8'
): string {
if ($separator === '' || $str === '') {
return '';
}
if ($encoding === 'UTF-8') {
$offset = \mb_strrpos($str, $separator);
if ($offset === false) {
return '';
}
return (string) \mb_substr(
$str,
$offset + (int) \mb_strlen($separator)
);
}
$offset = self::strrpos($str, $separator, 0, $encoding);
if ($offset === false) {
return '';
}
return (string) self::substr(
$str,
$offset + (int) self::strlen($separator, $encoding),
null,
$encoding
);
}
/**
* Gets the substring before the first occurrence of a separator.
*
* @param string $str The input string.
* @param string $separator The string separator.
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_substr_before_first_separator(
string $str,
string $separator,
string $encoding = 'UTF-8'
): string {
if ($separator === '' || $str === '') {
return '';
}
if ($encoding === 'UTF-8') {
$offset = \mb_strpos($str, $separator);
if ($offset === false) {
return '';
}
return (string) \mb_substr(
$str,
0,
$offset
);
}
$offset = self::strpos($str, $separator, 0, $encoding);
if ($offset === false) {
return '';
}
return (string) self::substr(
$str,
0,
$offset,
$encoding
);
}
/**
* Gets the substring before the last occurrence of a separator.
*
* @param string $str The input string.
* @param string $separator The string separator.
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
{
if ($separator === '' || $str === '') {
return '';
}
if ($encoding === 'UTF-8') {
$offset = \mb_strrpos($str, $separator);
if ($offset === false) {
return '';
}
return (string) \mb_substr(
$str,
0,
$offset
);
}
$offset = self::strrpos($str, $separator, 0, $encoding);
if ($offset === false) {
return '';
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
return (string) self::substr(
$str,
0,
$offset,
$encoding
);
}
/**
* Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
*
* @param string $str The input string.
* @param string $needle The string to look for.
* @param bool $before_needle [optional] Default: false
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_substr_first(
string $str,
string $needle,
bool $before_needle = false,
string $encoding = 'UTF-8'
): string {
if ($str === '' || $needle === '') {
return '';
}
if ($encoding === 'UTF-8') {
if ($before_needle) {
$part = \mb_strstr(
$str,
$needle,
$before_needle
);
} else {
$part = \mb_strstr(
$str,
$needle
);
}
} else {
$part = self::strstr(
$str,
$needle,
$before_needle,
$encoding
);
}
return $part === false ? '' : $part;
}
/**
* Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
*
* @param string $str The input string.
* @param string $needle The string to look for.
* @param bool $before_needle [optional] Default: false
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
*/
public static function str_substr_last(
string $str,
string $needle,
bool $before_needle = false,
string $encoding = 'UTF-8'
): string {
if ($str === '' || $needle === '') {
return '';
}
if ($encoding === 'UTF-8') {
if ($before_needle) {
$part = \mb_strrchr(
$str,
$needle,
$before_needle
);
} else {
$part = \mb_strrchr(
$str,
$needle
);
}
} else {
$part = self::strrchr(
$str,
$needle,
$before_needle,
$encoding
);
}
return $part === false ? '' : $part;
}
/**
* Surrounds $str with the given substring.
*
* @param string $str
* @param string $substring The substring to add to both sides.
*
* @psalm-pure
*
* @return string
* A string with the substring both prepended and appended.
*/
public static function str_surround(string $str, string $substring): string
{
return $substring . $str . $substring;
}
/**
* Returns a trimmed string with the first letter of each word capitalized.
* Also accepts an array, $ignore, allowing you to list words not to be
* capitalized.
*
* @param string $str
* @param array|string[]|null $ignore [optional] An array of words not to capitalize or
* null. Default: null
* @param string $encoding [optional] Default: 'UTF-8'
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the
* string.
* @param string|null $lang [optional] Set the language for special cases: az,
* el, lt, tr
* @param bool $try_to_keep_the_string_length [optional] true === try to keep the string length:
* e.g. ẞ -> ß
* @param bool $use_trim_first [optional] true === trim the input string,
* first
* @param string|null $word_define_chars [optional] An string of chars that will be used as
* whitespace separator === words.
*
* @psalm-pure
*
* @return string
* The titleized string.
*
* @noinspection PhpTooManyParametersInspection
*/
public static function str_titleize(
string $str,
array $ignore = null,
string $encoding = 'UTF-8',
bool $clean_utf8 = false,
string $lang = null,
bool $try_to_keep_the_string_length = false,
bool $use_trim_first = true,
string $word_define_chars = null
): string {
if ($str === '') {
return '';
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
if ($use_trim_first) {
$str = \trim($str);
}
if ($clean_utf8) {
$str = self::clean($str);
}
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
if ($word_define_chars) {
$word_define_chars = \preg_quote($word_define_chars, '/');
} else {
$word_define_chars = '';
}
$str = (string) \preg_replace_callback(
'/([^\\s' . $word_define_chars . ']+)/u',
static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
if ($ignore !== null && \in_array($match[0], $ignore, true)) {
return $match[0];
}
if ($use_mb_functions) {
if ($encoding === 'UTF-8') {
return \mb_strtoupper(\mb_substr($match[0], 0, 1))
. \mb_strtolower(\mb_substr($match[0], 1));
}
return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
. \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
}
return self::ucfirst(
self::strtolower(
$match[0],
$encoding,
false,
$lang,
$try_to_keep_the_string_length
),
$encoding,
false,
$lang,
$try_to_keep_the_string_length
);
},
$str
);
return $str;
}
/**
* Convert a string into a obfuscate string.
*
* EXAMPLE:
*
* UTF8::str_obfuscate('lars@moelleken.org', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
*
*
* @param string $str
* @param float $percent
* @param string $obfuscateChar
* @param string[] $keepChars
*
* @psalm-pure
*
* @return string
* The obfuscate string.
*/
public static function str_obfuscate(
string $str,
float $percent = 0.5,
string $obfuscateChar = '*',
array $keepChars = []
): string {
$obfuscateCharHelper = "\u{2603}";
$str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
$chars = self::chars($str);
$charsMax = \count($chars);
$charsMaxChange = \round($charsMax * $percent);
$charsCounter = 0;
$charKeyDone = [];
while ($charsCounter < $charsMaxChange) {
foreach ($chars as $charKey => $char) {
if (isset($charKeyDone[$charKey])) {
continue;
}
if (\random_int(0, 100) > 50) {
continue;
}
if ($char === $obfuscateChar) {
continue;
}
++$charsCounter;
$charKeyDone[$charKey] = true;
if ($charsCounter > $charsMaxChange) {
break;
}
if (\in_array($char, $keepChars, true)) {
continue;
}
$chars[$charKey] = $obfuscateChar;
}
}
$str = \implode('', $chars);
return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
}
/**
* Returns a trimmed string in proper title case.
*
* Also accepts an array, $ignore, allowing you to list words not to be
* capitalized.
*
* Adapted from John Gruber's script.
*
* @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
*
* @param string $str
* @param array $ignore An array of words not to capitalize.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
* The titleized string.
*/
public static function str_titleize_for_humans(
string $str,
array $ignore = [],
string $encoding = 'UTF-8'
): string {
if ($str === '') {
return '';
}
$small_words = [
'(? In-Flight
$str = (string) \preg_replace_callback(
'~\\b
(? "Stand-In" (Stand is already capped at this point)
$str = (string) \preg_replace_callback(
'~\\b
(?UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'
*
* @param string $str The input string.
*
* @psalm-pure
*
* @return false|string
* false on error
*/
public static function str_to_binary(string $str)
{
/** @var array|false $value - needed for PhpStan (stubs error) */
$value = \unpack('H*', $str);
if ($value === false) {
return false;
}
/** @noinspection OffsetOperationsInspection */
return \base_convert($value[1], 16, 2);
}
/**
* @param string $str
* @param bool $remove_empty_values Remove empty values.
* @param int|null $remove_short_values The min. string length or null to disable
*
* @psalm-pure
*
* @return string[]
*/
public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
{
if ($str === '') {
return $remove_empty_values ? [] : [''];
}
if (self::$SUPPORT['mbstring'] === true) {
/** @noinspection PhpComposerExtensionStubsInspection */
$return = \mb_split("[\r\n]{1,2}", $str);
} else {
$return = \preg_split("/[\r\n]{1,2}/u", $str);
}
if ($return === false) {
return $remove_empty_values ? [] : [''];
}
if (
$remove_short_values === null
&&
!$remove_empty_values
) {
return $return;
}
return self::reduce_string_array(
$return,
$remove_empty_values,
$remove_short_values
);
}
/**
* Convert a string into an array of words.
*
* EXAMPLE: UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')
*
* @param string $str
* @param string $char_list Additional chars for the definition of "words".
* @param bool $remove_empty_values Remove empty values.
* @param int|null $remove_short_values The min. string length or null to disable
*
* @psalm-pure
*
* @return string[]
*/
public static function str_to_words(
string $str,
string $char_list = '',
bool $remove_empty_values = false,
int $remove_short_values = null
): array {
if ($str === '') {
return $remove_empty_values ? [] : [''];
}
$char_list = self::rxClass($char_list, '\pL');
$return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
if ($return === false) {
return $remove_empty_values ? [] : [''];
}
if (
$remove_short_values === null
&&
!$remove_empty_values
) {
return $return;
}
$tmp_return = self::reduce_string_array(
$return,
$remove_empty_values,
$remove_short_values
);
foreach ($tmp_return as &$item) {
$item = (string) $item;
}
return $tmp_return;
}
/**
* alias for "UTF8::to_ascii()"
*
* @param string $str
* @param string $unknown
* @param bool $strict
*
* @psalm-pure
*
* @return string
*
* @see UTF8::to_ascii()
* @deprecated please use "UTF8::to_ascii()"
*/
public static function str_transliterate(
string $str,
string $unknown = '?',
bool $strict = false
): string {
return self::to_ascii($str, $unknown, $strict);
}
/**
* Truncates the string to a given length. If $substring is provided, and
* truncating occurs, the string is further truncated so that the substring
* may be appended without exceeding the desired length.
*
* @param string $str
* @param int $length Desired length of the truncated string.
* @param string $substring [optional] The substring to append if it can fit. Default: ''
* @param string $encoding [optional] Default: 'UTF-8'
*
* @psalm-pure
*
* @return string
* A string after truncating.
*/
public static function str_truncate(
string $str,
int $length,
string $substring = '',
string $encoding = 'UTF-8'
): string {
if ($str === '') {
return '';
}
if ($encoding === 'UTF-8') {
if ($length >= (int) \mb_strlen($str)) {
return $str;
}
if ($substring !== '') {
$length -= (int) \mb_strlen($substring);
/** @noinspection UnnecessaryCastingInspection */
return (string) \mb_substr($str, 0, $length) . $substring;
}
/** @noinspection UnnecessaryCastingInspection */
return (string) \mb_substr($str, 0, $length);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
if ($length >= (int) self::strlen($str, $encoding)) {
return $str;
}
if ($substring !== '') {
$length -= (int) self::strlen($substring, $encoding);
}
return (
(string) self::substr(
$str,
0,
$length,
$encoding
)
) . $substring;
}
/**
* Truncates the string to a given length, while ensuring that it does not
* split words. If $substring is provided, and truncating occurs, the
* string is further truncated so that the substring may be appended without
* exceeding the desired length.
*
* @param string $str
* @param int $length Desired length of the truncated string.
* @param string $substring [optional] The substring to append if it can fit.
* Default:
* ''
* @param string $encoding [optional] Default: 'UTF-8'
* @param bool $ignore_do_not_split_words_for_one_word [optional] Default: false
*
* @psalm-pure
*
* @return string
* A string after truncating.
*/
public static function str_truncate_safe(
string $str,
int $length,
string $substring = '',
string $encoding = 'UTF-8',
bool $ignore_do_not_split_words_for_one_word = false
): string {
if ($str === '' || $length <= 0) {
return $substring;
}
if ($encoding === 'UTF-8') {
if ($length >= (int) \mb_strlen($str)) {
return $str;
}
// need to further trim the string so we can append the substring
$length -= (int) \mb_strlen($substring);
if ($length <= 0) {
return $substring;
}
/** @var false|string $truncated - needed for PhpStan (stubs error) */
$truncated = \mb_substr($str, 0, $length);
if ($truncated === false) {
return '';
}
// if the last word was truncated
$space_position = \mb_strpos($str, ' ', $length - 1);
if ($space_position !== $length) {
// find pos of the last occurrence of a space, get up to that
$last_position = \mb_strrpos($truncated, ' ', 0);
if (
$last_position !== false
||
(
$space_position !== false
&&
!$ignore_do_not_split_words_for_one_word
)
) {
$truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
}
}
} else {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
if ($length >= (int) self::strlen($str, $encoding)) {
return $str;
}
// need to further trim the string so we can append the substring
$length -= (int) self::strlen($substring, $encoding);
if ($length <= 0) {
return $substring;
}
$truncated = self::substr($str, 0, $length, $encoding);
if ($truncated === false) {
return '';
}
// if the last word was truncated
$space_position = self::strpos($str, ' ', $length - 1, $encoding);
if ($space_position !== $length) {
// find pos of the last occurrence of a space, get up to that
$last_position = self::strrpos($truncated, ' ', 0, $encoding);
if (
$last_position !== false
||
(
$space_position !== false
&&
!$ignore_do_not_split_words_for_one_word
)
) {
$truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
}
}
}
return $truncated . $substring;
}
/**
* Returns a lowercase and trimmed string separated by underscores.
* Underscores are inserted before uppercase characters (with the exception
* of the first character of the string), and in place of spaces as well as
* dashes.
*
* @param string $str
*
* @psalm-pure
*
* @return string
* The underscored string.
*/
public static function str_underscored(string $str): string
{
return self::str_delimit($str, '_');
}
/**
* Returns an UpperCamelCase version of the supplied string. It trims
* surrounding spaces, capitalizes letters following digits, spaces, dashes
* and underscores, and removes spaces, dashes, underscores.
*
* @param string $str The input string.
* @param string $encoding [optional] Default: 'UTF-8'
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
* @param string|null $lang [optional] Set the language for special cases: az, el, lt,
* tr
* @param bool $try_to_keep_the_string_length [optional] true === try to keep the string length: e.g. ẞ
* -> ß
*
* @psalm-pure
*
* @return string
* A string in UpperCamelCase.
*/
public static function str_upper_camelize(
string $str,
string $encoding = 'UTF-8',
bool $clean_utf8 = false,
string $lang = null,
bool $try_to_keep_the_string_length = false
): string {
return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
}
/**
* alias for "UTF8::ucfirst()"
*
* @param string $str
* @param string $encoding
* @param bool $clean_utf8
* @param string|null $lang
* @param bool $try_to_keep_the_string_length
*
* @psalm-pure
*
* @return string
*
* @see UTF8::ucfirst()
* @deprecated please use "UTF8::ucfirst()"
*/
public static function str_upper_first(
string $str,
string $encoding = 'UTF-8',
bool $clean_utf8 = false,
string $lang = null,
bool $try_to_keep_the_string_length = false
): string {
return self::ucfirst(
$str,
$encoding,
$clean_utf8,
$lang,
$try_to_keep_the_string_length
);
}
/**
* Get the number of words in a specific string.
*
* EXAMPLES:
* // format: 0 -> return only word count (int)
* //
* UTF8::str_word_count('中文空白 öäü abc#c'); // 4
* UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
*
* // format: 1 -> return words (array)
* //
* UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
* UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
*
* // format: 2 -> return words with offset (array)
* //
* UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
* UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
*
*
* @param string $str The input string.
* @param int $format [optional]
* 0 => return a number of words (default)
* 1 => return an array of words
* 2 => return an array of words with word-offset as key
*
* @param string $char_list [optional] Additional chars that contains to words and do not start a new word.
*
* @psalm-pure
*
* @return int|string[]
* The number of words in the string.
*/
public static function str_word_count(string $str, int $format = 0, string $char_list = '')
{
$str_parts = self::str_to_words($str, $char_list);
$len = \count($str_parts);
if ($format === 1) {
$number_of_words = [];
for ($i = 1; $i < $len; $i += 2) {
$number_of_words[] = $str_parts[$i];
}
} elseif ($format === 2) {
$number_of_words = [];
$offset = (int) self::strlen($str_parts[0]);
for ($i = 1; $i < $len; $i += 2) {
$number_of_words[$offset] = $str_parts[$i];
$offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
}
} else {
$number_of_words = (int) (($len - 1) / 2);
}
return $number_of_words;
}
/**
* Case-insensitive string comparison.
*
* INFO: Case-insensitive version of UTF8::strcmp()
*
* EXAMPLE: UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0
*
* @param string $str1 The first string.
* @param string $str2 The second string.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return int
* < 0 if str1 is less than str2;
* > 0 if str1 is greater than str2,
* 0 if they are equal
*/
public static function strcasecmp(
string $str1,
string $str2,
string $encoding = 'UTF-8'
): int {
return self::strcmp(
self::strtocasefold(
$str1,
true,
false,
$encoding,
null,
false
),
self::strtocasefold(
$str2,
true,
false,
$encoding,
null,
false
)
);
}
/**
* alias for "UTF8::strstr()"
*
* @param string $haystack
* @param string $needle
* @param bool $before_needle
* @param string $encoding
* @param bool $clean_utf8
*
* @psalm-pure
*
* @return false|string
*
* @see UTF8::strstr()
* @deprecated please use "UTF8::strstr()"
*/
public static function strchr(
string $haystack,
string $needle,
bool $before_needle = false,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
return self::strstr(
$haystack,
$needle,
$before_needle,
$encoding,
$clean_utf8
);
}
/**
* Case-sensitive string comparison.
*
* EXAMPLE: UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0
*
* @param string $str1 The first string.
* @param string $str2 The second string.
*
* @psalm-pure
*
* @return int
* < 0 if str1 is less than str2
* > 0 if str1 is greater than str2
* 0 if they are equal
*/
public static function strcmp(string $str1, string $str2): int
{
if ($str1 === $str2) {
return 0;
}
return \strcmp(
\Normalizer::normalize($str1, \Normalizer::NFD),
\Normalizer::normalize($str2, \Normalizer::NFD)
);
}
/**
* Find length of initial segment not matching mask.
*
* @param string $str
* @param string $char_list
* @param int $offset
* @param int|null $length
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return int
*/
public static function strcspn(
string $str,
string $char_list,
int $offset = 0,
int $length = null,
string $encoding = 'UTF-8'
): int {
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
if ($char_list === '') {
return (int) self::strlen($str, $encoding);
}
if ($offset || $length !== null) {
if ($encoding === 'UTF-8') {
if ($length === null) {
$str_tmp = \mb_substr($str, $offset);
} else {
$str_tmp = \mb_substr($str, $offset, $length);
}
} else {
$str_tmp = self::substr($str, $offset, $length, $encoding);
}
if ($str_tmp === false) {
return 0;
}
/** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
$str = $str_tmp;
}
if ($str === '') {
return 0;
}
$matches = [];
if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
$return = self::strlen($matches[1], $encoding);
if ($return === false) {
return 0;
}
return $return;
}
return (int) self::strlen($str, $encoding);
}
/**
* alias for "UTF8::stristr()"
*
* @param string $haystack
* @param string $needle
* @param bool $before_needle
* @param string $encoding
* @param bool $clean_utf8
*
* @psalm-pure
*
* @return false|string
*
* @see UTF8::stristr()
* @deprecated please use "UTF8::stristr()"
*/
public static function strichr(
string $haystack,
string $needle,
bool $before_needle = false,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
return self::stristr(
$haystack,
$needle,
$before_needle,
$encoding,
$clean_utf8
);
}
/**
* Create a UTF-8 string from code points.
*
* INFO: opposite to UTF8::codepoints()
*
* EXAMPLE: UTF8::string(array(246, 228, 252)); // 'öäü'
*
* @param int|int[]|string|string[] $intOrHex Integer or Hexadecimal codepoints.
*
* @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
*
* @psalm-pure
*
* @return string
* A UTF-8 encoded string.
*/
public static function string($intOrHex): string
{
if ($intOrHex === []) {
return '';
}
if (!\is_array($intOrHex)) {
$intOrHex = [$intOrHex];
}
$str = '';
foreach ($intOrHex as $strPart) {
$str .= '' . (int) $strPart . ';';
}
return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
}
/**
* Checks if string starts with "BOM" (Byte Order Mark Character) character.
*
* EXAMPLE: UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true
*
* @param string $str The input string.
*
* @psalm-pure
*
* @return bool
*
* true if the string has BOM at the start,
* false otherwise
*
*/
public static function string_has_bom(string $str): bool
{
/** @noinspection PhpUnusedLocalVariableInspection */
foreach (self::$BOM as $bom_string => &$bom_byte_length) {
if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
return true;
}
}
return false;
}
/**
* Strip HTML and PHP tags from a string + clean invalid UTF-8.
*
* EXAMPLE: UTF8::strip_tags("κόσμε\xa0\xa1"); // 'κόσμε'
*
* @see http://php.net/manual/en/function.strip-tags.php
*
* @param string $str
* The input string.
*
* @param string|null $allowable_tags [optional]
* You can use the optional second parameter to specify tags which should
* not be stripped.
*
*
* HTML comments and PHP tags are also stripped. This is hardcoded and
* can not be changed with allowable_tags.
*
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return string
* The stripped string.
*/
public static function strip_tags(
string $str,
string $allowable_tags = null,
bool $clean_utf8 = false
): string {
if ($str === '') {
return '';
}
if ($clean_utf8) {
$str = self::clean($str);
}
if ($allowable_tags === null) {
return \strip_tags($str);
}
return \strip_tags($str, $allowable_tags);
}
/**
* Strip all whitespace characters. This includes tabs and newline
* characters, as well as multibyte whitespace such as the thin space
* and ideographic space.
*
* EXAMPLE: UTF8::strip_whitespace(' Ο συγγραφέας '); // 'Οσυγγραφέας'
*
* @param string $str
*
* @psalm-pure
*
* @return string
*/
public static function strip_whitespace(string $str): string
{
if ($str === '') {
return '';
}
return (string) \preg_replace('/[[:space:]]+/u', '', $str);
}
/**
* Find the position of the first occurrence of a substring in a string, case-insensitive.
*
* INFO: use UTF8::stripos_in_byte() for the byte-length
*
* EXAMPLE: UTF8::stripos('aσσb', 'ΣΣ'); // 1
(σσ == ΣΣ)
*
* @see http://php.net/manual/en/function.mb-stripos.php
*
* @param string $haystack The string from which to get the position of the first occurrence of needle.
* @param string $needle The string to find in haystack.
* @param int $offset [optional] The position in haystack to start searching.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return false|int
* Return the (int) numeric position of the first occurrence of needle in the
* haystack string,
or false if needle is not found
*/
public static function stripos(
string $haystack,
string $needle,
int $offset = 0,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
if ($haystack === '') {
if (\PHP_VERSION_ID >= 80000 && $needle === '') {
return 0;
}
return false;
}
if ($needle === '' && \PHP_VERSION_ID < 80000) {
return false;
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$haystack = self::clean($haystack);
$needle = self::clean($needle);
}
if (self::$SUPPORT['mbstring'] === true) {
if ($encoding === 'UTF-8') {
return \mb_stripos($haystack, $needle, $offset);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
return \mb_stripos($haystack, $needle, $offset, $encoding);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
if (
$encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
&&
$offset >= 0 // grapheme_stripos() can't handle negative offset
&&
self::$SUPPORT['intl'] === true
) {
$return_tmp = \grapheme_stripos($haystack, $needle, $offset);
if ($return_tmp !== false) {
return $return_tmp;
}
}
//
// fallback for ascii only
//
if (ASCII::is_ascii($haystack . $needle)) {
return \stripos($haystack, $needle, $offset);
}
//
// fallback via vanilla php
//
$haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
$needle = self::strtocasefold($needle, true, false, $encoding, null, false);
return self::strpos($haystack, $needle, $offset, $encoding);
}
/**
* Returns all of haystack starting from and including the first occurrence of needle to the end.
*
* EXAMPLE:
* $str = 'iñtërnâtiônàlizætiøn';
* $search = 'NÂT';
*
* UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
* UTF8::stristr($str, $search, true)); // 'iñtër'
*
*
* @param string $haystack The input string. Must be valid UTF-8.
* @param string $needle The string to look for. Must be valid UTF-8.
* @param bool $before_needle [optional]
* If TRUE, it returns the part of the
* haystack before the first occurrence of the needle (excluding the needle).
*
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return false|string
* A sub-string,
or false if needle is not found.
*/
public static function stristr(
string $haystack,
string $needle,
bool $before_needle = false,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
if ($haystack === '') {
if (\PHP_VERSION_ID >= 80000 && $needle === '') {
return '';
}
return false;
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$needle = self::clean($needle);
$haystack = self::clean($haystack);
}
if ($needle === '') {
if (\PHP_VERSION_ID >= 80000) {
return $haystack;
}
return false;
}
if (self::$SUPPORT['mbstring'] === true) {
if ($encoding === 'UTF-8') {
return \mb_stristr($haystack, $needle, $before_needle);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
return \mb_stristr($haystack, $needle, $before_needle, $encoding);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
if (
$encoding !== 'UTF-8'
&&
self::$SUPPORT['mbstring'] === false
) {
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
}
if (
$encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
&&
self::$SUPPORT['intl'] === true
) {
$return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
if ($return_tmp !== false) {
return $return_tmp;
}
}
if (ASCII::is_ascii($needle . $haystack)) {
return \stristr($haystack, $needle, $before_needle);
}
\preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
if (!isset($match[1])) {
return false;
}
if ($before_needle) {
return $match[1];
}
return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
}
/**
* Get the string length, not the byte-length!
*
* INFO: use UTF8::strwidth() for the char-length
*
* EXAMPLE: UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20
*
* @see http://php.net/manual/en/function.mb-strlen.php
*
* @param string $str The string being checked for length.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return false|int
*
* The number (int) of characters in the string $str having character encoding
* $encoding.
* (One multi-byte character counted as +1).
*
* Can return false, if e.g. mbstring is not installed and we process invalid
* chars.
*
*/
public static function strlen(
string $str,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
if ($str === '') {
return 0;
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
if ($clean_utf8) {
// "mb_strlen" and "\iconv_strlen" returns wrong length,
// if invalid characters are found in $str
$str = self::clean($str);
}
//
// fallback via mbstring
//
if (self::$SUPPORT['mbstring'] === true) {
if ($encoding === 'UTF-8') {
/** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
return @\mb_strlen($str);
}
/** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
return @\mb_strlen($str, $encoding);
}
//
// fallback for binary || ascii only
//
if (
$encoding === 'CP850'
||
$encoding === 'ASCII'
) {
return \strlen($str);
}
if (
$encoding !== 'UTF-8'
&&
self::$SUPPORT['mbstring'] === false
&&
self::$SUPPORT['iconv'] === false
) {
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
}
//
// fallback via iconv
//
if (self::$SUPPORT['iconv'] === true) {
$return_tmp = \iconv_strlen($str, $encoding);
if ($return_tmp !== false) {
return $return_tmp;
}
}
//
// fallback via intl
//
if (
$encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
&&
self::$SUPPORT['intl'] === true
) {
$return_tmp = \grapheme_strlen($str);
if ($return_tmp !== null) {
return $return_tmp;
}
}
//
// fallback for ascii only
//
if (ASCII::is_ascii($str)) {
return \strlen($str);
}
//
// fallback via vanilla php
//
\preg_match_all('/./us', $str, $parts);
$return_tmp = \count($parts[0]);
if ($return_tmp === 0) {
return false;
}
return $return_tmp;
}
/**
* Get string length in byte.
*
* @param string $str
*
* @psalm-pure
*
* @return int
*/
public static function strlen_in_byte(string $str): int
{
if ($str === '') {
return 0;
}
if (self::$SUPPORT['mbstring_func_overload'] === true) {
// "mb_" is available if overload is used, so use it ...
return \mb_strlen($str, 'CP850'); // 8-BIT
}
return \strlen($str);
}
/**
* Case-insensitive string comparisons using a "natural order" algorithm.
*
* INFO: natural order version of UTF8::strcasecmp()
*
* EXAMPLES:
* UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
* UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
*
* UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
* UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
*
*
* @param string $str1 The first string.
* @param string $str2 The second string.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return int
* < 0 if str1 is less than str2
* > 0 if str1 is greater than str2
* 0 if they are equal
*/
public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
{
return self::strnatcmp(
self::strtocasefold($str1, true, false, $encoding, null, false),
self::strtocasefold($str2, true, false, $encoding, null, false)
);
}
/**
* String comparisons using a "natural order" algorithm
*
* INFO: natural order version of UTF8::strcmp()
*
* EXAMPLES:
* UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
* UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
*
* UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
* UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
*
*
* @see http://php.net/manual/en/function.strnatcmp.php
*
* @param string $str1 The first string.
* @param string $str2 The second string.
*
* @psalm-pure
*
* @return int
* < 0 if str1 is less than str2;
* > 0 if str1 is greater than str2;
* 0 if they are equal
*/
public static function strnatcmp(string $str1, string $str2): int
{
if ($str1 === $str2) {
return 0;
}
return \strnatcmp(
(string) self::strtonatfold($str1),
(string) self::strtonatfold($str2)
);
}
/**
* Case-insensitive string comparison of the first n characters.
*
* EXAMPLE:
* UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
*
*
* @see http://php.net/manual/en/function.strncasecmp.php
*
* @param string $str1 The first string.
* @param string $str2 The second string.
* @param int $len The length of strings to be used in the comparison.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return int
* < 0 if str1 is less than str2;
* > 0 if str1 is greater than str2;
* 0 if they are equal
*/
public static function strncasecmp(
string $str1,
string $str2,
int $len,
string $encoding = 'UTF-8'
): int {
return self::strncmp(
self::strtocasefold($str1, true, false, $encoding, null, false),
self::strtocasefold($str2, true, false, $encoding, null, false),
$len
);
}
/**
* String comparison of the first n characters.
*
* EXAMPLE:
* UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
*
*
* @see http://php.net/manual/en/function.strncmp.php
*
* @param string $str1 The first string.
* @param string $str2 The second string.
* @param int $len Number of characters to use in the comparison.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return int
* < 0 if str1 is less than str2;
* > 0 if str1 is greater than str2;
* 0 if they are equal
*/
public static function strncmp(
string $str1,
string $str2,
int $len,
string $encoding = 'UTF-8'
): int {
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
if ($encoding === 'UTF-8') {
$str1 = (string) \mb_substr($str1, 0, $len);
$str2 = (string) \mb_substr($str2, 0, $len);
} else {
$str1 = (string) self::substr($str1, 0, $len, $encoding);
$str2 = (string) self::substr($str2, 0, $len, $encoding);
}
return self::strcmp($str1, $str2);
}
/**
* Search a string for any of a set of characters.
*
* EXAMPLE: UTF8::strpbrk('-中文空白-', '白'); // '白-'
*
* @see http://php.net/manual/en/function.strpbrk.php
*
* @param string $haystack The string where char_list is looked for.
* @param string $char_list This parameter is case-sensitive.
*
* @psalm-pure
*
* @return false|string
* The string starting from the character found, or false if it is not found.
*/
public static function strpbrk(string $haystack, string $char_list)
{
if ($haystack === '' || $char_list === '') {
return false;
}
if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
return \substr($haystack, (int) \strpos($haystack, $m[0]));
}
return false;
}
/**
* Find the position of the first occurrence of a substring in a string.
*
* INFO: use UTF8::strpos_in_byte() for the byte-length
*
* EXAMPLE: UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8
*
* @see http://php.net/manual/en/function.mb-strpos.php
*
* @param string $haystack The string from which to get the position of the first occurrence of needle.
* @param int|string $needle The string to find in haystack.
Or a code point as int.
* @param int $offset [optional] The search offset. If it is not specified, 0 is used.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return false|int
* The (int) numeric position of the first occurrence of needle in the haystack
* string.
If needle is not found it returns false.
*/
public static function strpos(
string $haystack,
$needle,
int $offset = 0,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
if ($haystack === '') {
if (\PHP_VERSION_ID >= 80000) {
if ($needle === '') {
return 0;
}
} else {
return false;
}
}
// iconv and mbstring do not support integer $needle
if ((int) $needle === $needle) {
$needle = (string) self::chr($needle);
}
$needle = (string) $needle;
if ($haystack === '') {
if (\PHP_VERSION_ID >= 80000 && $needle === '') {
return 0;
}
return false;
}
if ($needle === '' && \PHP_VERSION_ID < 80000) {
return false;
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$needle = self::clean($needle);
$haystack = self::clean($haystack);
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
//
// fallback via mbstring
//
if (self::$SUPPORT['mbstring'] === true) {
if ($encoding === 'UTF-8') {
/** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
return @\mb_strpos($haystack, $needle, $offset);
}
/** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
return @\mb_strpos($haystack, $needle, $offset, $encoding);
}
//
// fallback for binary || ascii only
//
if (
$encoding === 'CP850'
||
$encoding === 'ASCII'
) {
return \strpos($haystack, $needle, $offset);
}
if (
$encoding !== 'UTF-8'
&&
self::$SUPPORT['iconv'] === false
&&
self::$SUPPORT['mbstring'] === false
) {
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
}
//
// fallback via intl
//
if (
$encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
&&
$offset >= 0 // grapheme_strpos() can't handle negative offset
&&
self::$SUPPORT['intl'] === true
) {
$return_tmp = \grapheme_strpos($haystack, $needle, $offset);
if ($return_tmp !== false) {
return $return_tmp;
}
}
//
// fallback via iconv
//
if (
$offset >= 0 // iconv_strpos() can't handle negative offset
&&
self::$SUPPORT['iconv'] === true
) {
// ignore invalid negative offset to keep compatibility
// with php < 5.5.35, < 5.6.21, < 7.0.6
$return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
if ($return_tmp !== false) {
return $return_tmp;
}
}
//
// fallback for ascii only
//
if (ASCII::is_ascii($haystack . $needle)) {
/** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
return @\strpos($haystack, $needle, $offset);
}
//
// fallback via vanilla php
//
$haystack_tmp = self::substr($haystack, $offset, null, $encoding);
if ($haystack_tmp === false) {
$haystack_tmp = '';
}
$haystack = (string) $haystack_tmp;
if ($offset < 0) {
$offset = 0;
}
$pos = \strpos($haystack, $needle);
if ($pos === false) {
return false;
}
if ($pos) {
return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
}
return $offset + 0;
}
/**
* Find the position of the first occurrence of a substring in a string.
*
* @param string $haystack
* The string being checked.
*
* @param string $needle
* The position counted from the beginning of haystack.
*
* @param int $offset [optional]
* The search offset. If it is not specified, 0 is used.
*
*
* @psalm-pure
*
* @return false|int
* The numeric position of the first occurrence of needle in the
* haystack string. If needle is not found, it returns false.
*/
public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
{
if ($haystack === '' || $needle === '') {
return false;
}
if (self::$SUPPORT['mbstring_func_overload'] === true) {
// "mb_" is available if overload is used, so use it ...
return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
}
return \strpos($haystack, $needle, $offset);
}
/**
* Find the position of the first occurrence of a substring in a string, case-insensitive.
*
* @param string $haystack
* The string being checked.
*
* @param string $needle
* The position counted from the beginning of haystack.
*
* @param int $offset [optional]
* The search offset. If it is not specified, 0 is used.
*
*
* @psalm-pure
*
* @return false|int
* The numeric position of the first occurrence of needle in the
* haystack string. If needle is not found, it returns false.
*/
public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
{
if ($haystack === '' || $needle === '') {
return false;
}
if (self::$SUPPORT['mbstring_func_overload'] === true) {
// "mb_" is available if overload is used, so use it ...
return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
}
return \stripos($haystack, $needle, $offset);
}
/**
* Find the last occurrence of a character in a string within another.
*
* EXAMPLE: UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'
*
* @see http://php.net/manual/en/function.mb-strrchr.php
*
* @param string $haystack The string from which to get the last occurrence of needle.
* @param string $needle The string to find in haystack
* @param bool $before_needle [optional]
* Determines which portion of haystack
* this function returns.
* If set to true, it returns all of haystack
* from the beginning to the last occurrence of needle.
* If set to false, it returns all of haystack
* from the last occurrence of needle to the end,
*
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return false|string
* The portion of haystack or false if needle is not found.
*/
public static function strrchr(
string $haystack,
string $needle,
bool $before_needle = false,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
if ($haystack === '' || $needle === '') {
return false;
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$needle = self::clean($needle);
$haystack = self::clean($haystack);
}
//
// fallback via mbstring
//
if (self::$SUPPORT['mbstring'] === true) {
if ($encoding === 'UTF-8') {
return \mb_strrchr($haystack, $needle, $before_needle);
}
return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
}
//
// fallback for binary || ascii only
//
if (
!$before_needle
&&
(
$encoding === 'CP850'
||
$encoding === 'ASCII'
)
) {
return \strrchr($haystack, $needle);
}
if (
$encoding !== 'UTF-8'
&&
self::$SUPPORT['mbstring'] === false
) {
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
}
//
// fallback via iconv
//
if (self::$SUPPORT['iconv'] === true) {
$needle_tmp = self::substr($needle, 0, 1, $encoding);
if ($needle_tmp === false) {
return false;
}
$needle = (string) $needle_tmp;
$pos = \iconv_strrpos($haystack, $needle, $encoding);
if ($pos === false) {
return false;
}
if ($before_needle) {
return self::substr($haystack, 0, $pos, $encoding);
}
return self::substr($haystack, $pos, null, $encoding);
}
//
// fallback via vanilla php
//
$needle_tmp = self::substr($needle, 0, 1, $encoding);
if ($needle_tmp === false) {
return false;
}
$needle = (string) $needle_tmp;
$pos = self::strrpos($haystack, $needle, 0, $encoding);
if ($pos === false) {
return false;
}
if ($before_needle) {
return self::substr($haystack, 0, $pos, $encoding);
}
return self::substr($haystack, $pos, null, $encoding);
}
/**
* Reverses characters order in the string.
*
* EXAMPLE: UTF8::strrev('κ-öäü'); // 'üäö-κ'
*
* @param string $str The input string.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
* The string with characters in the reverse sequence.
*/
public static function strrev(string $str, string $encoding = 'UTF-8'): string
{
if ($str === '') {
return '';
}
// init
$reversed = '';
$str = self::emoji_encode($str, true);
if ($encoding === 'UTF-8') {
if (self::$SUPPORT['intl'] === true) {
// try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
$i = (int) \grapheme_strlen($str);
while ($i--) {
$reversed_tmp = \grapheme_substr($str, $i, 1);
if ($reversed_tmp !== false) {
$reversed .= $reversed_tmp;
}
}
} else {
$i = (int) \mb_strlen($str);
while ($i--) {
$reversed_tmp = \mb_substr($str, $i, 1);
if ($reversed_tmp !== false) {
$reversed .= $reversed_tmp;
}
}
}
} else {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
$i = (int) self::strlen($str, $encoding);
while ($i--) {
$reversed_tmp = self::substr($str, $i, 1, $encoding);
if ($reversed_tmp !== false) {
$reversed .= $reversed_tmp;
}
}
}
return self::emoji_decode($reversed, true);
}
/**
* Find the last occurrence of a character in a string within another, case-insensitive.
*
* EXAMPLE: UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'
*
* @see http://php.net/manual/en/function.mb-strrichr.php
*
* @param string $haystack The string from which to get the last occurrence of needle.
* @param string $needle The string to find in haystack.
* @param bool $before_needle [optional]
* Determines which portion of haystack
* this function returns.
* If set to true, it returns all of haystack
* from the beginning to the last occurrence of needle.
* If set to false, it returns all of haystack
* from the last occurrence of needle to the end,
*
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return false|string
* The portion of haystack or
false if needle is not found.
*/
public static function strrichr(
string $haystack,
string $needle,
bool $before_needle = false,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
if ($haystack === '' || $needle === '') {
return false;
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$needle = self::clean($needle);
$haystack = self::clean($haystack);
}
//
// fallback via mbstring
//
if (self::$SUPPORT['mbstring'] === true) {
if ($encoding === 'UTF-8') {
return \mb_strrichr($haystack, $needle, $before_needle);
}
return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
}
//
// fallback via vanilla php
//
$needle_tmp = self::substr($needle, 0, 1, $encoding);
if ($needle_tmp === false) {
return false;
}
$needle = (string) $needle_tmp;
$pos = self::strripos($haystack, $needle, 0, $encoding);
if ($pos === false) {
return false;
}
if ($before_needle) {
return self::substr($haystack, 0, $pos, $encoding);
}
return self::substr($haystack, $pos, null, $encoding);
}
/**
* Find the position of the last occurrence of a substring in a string, case-insensitive.
*
* EXAMPLE: UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13
*
* @param string $haystack The string to look in.
* @param int|string $needle The string to look for.
* @param int $offset [optional] Number of characters to ignore in the beginning or end.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return false|int
* The (int) numeric position of the last occurrence of needle in the haystack
* string.
If needle is not found, it returns false.
*/
public static function strripos(
string $haystack,
$needle,
int $offset = 0,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
if ($haystack === '') {
if (\PHP_VERSION_ID >= 80000) {
if ($needle === '') {
return 0;
}
} else {
return false;
}
}
// iconv and mbstring do not support integer $needle
if ((int) $needle === $needle && $needle >= 0) {
$needle = (string) self::chr($needle);
}
$needle = (string) $needle;
if ($haystack === '') {
if (\PHP_VERSION_ID >= 80000 && $needle === '') {
return 0;
}
return false;
}
if ($needle === '' && \PHP_VERSION_ID < 80000) {
return false;
}
if ($clean_utf8) {
// mb_strripos() && iconv_strripos() is not tolerant to invalid characters
$needle = self::clean($needle);
$haystack = self::clean($haystack);
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
//
// fallback via mbstrig
//
if (self::$SUPPORT['mbstring'] === true) {
if ($encoding === 'UTF-8') {
return \mb_strripos($haystack, $needle, $offset);
}
return \mb_strripos($haystack, $needle, $offset, $encoding);
}
//
// fallback for binary || ascii only
//
if (
$encoding === 'CP850'
||
$encoding === 'ASCII'
) {
return \strripos($haystack, $needle, $offset);
}
if (
$encoding !== 'UTF-8'
&&
self::$SUPPORT['mbstring'] === false
) {
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
}
//
// fallback via intl
//
if (
$encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
&&
$offset >= 0 // grapheme_strripos() can't handle negative offset
&&
self::$SUPPORT['intl'] === true
) {
$return_tmp = \grapheme_strripos($haystack, $needle, $offset);
if ($return_tmp !== false) {
return $return_tmp;
}
}
//
// fallback for ascii only
//
if (ASCII::is_ascii($haystack . $needle)) {
return \strripos($haystack, $needle, $offset);
}
//
// fallback via vanilla php
//
$haystack = self::strtocasefold($haystack, true, false, $encoding);
$needle = self::strtocasefold($needle, true, false, $encoding);
return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
}
/**
* Finds position of last occurrence of a string within another, case-insensitive.
*
* @param string $haystack
* The string from which to get the position of the last occurrence
* of needle.
*
* @param string $needle
* The string to find in haystack.
*
* @param int $offset [optional]
* The position in haystack
* to start searching.
*
*
* @psalm-pure
*
* @return false|int
* eturn the numeric position of the last occurrence of needle in the
* haystack string, or false if needle is not found.
*/
public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
{
if ($haystack === '' || $needle === '') {
return false;
}
if (self::$SUPPORT['mbstring_func_overload'] === true) {
// "mb_" is available if overload is used, so use it ...
return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
}
return \strripos($haystack, $needle, $offset);
}
/**
* Find the position of the last occurrence of a substring in a string.
*
* EXAMPLE: UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13
*
* @see http://php.net/manual/en/function.mb-strrpos.php
*
* @param string $haystack The string being checked, for the last occurrence of needle
* @param int|string $needle The string to find in haystack.
Or a code point as int.
* @param int $offset [optional] May be specified to begin searching an arbitrary number of characters
* into the string. Negative values will stop searching at an arbitrary point prior to
* the end of the string.
*
* @param string $encoding [optional] Set the charset.
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return false|int
* The (int) numeric position of the last occurrence of needle in the haystack
* string.
If needle is not found, it returns false.
*/
public static function strrpos(
string $haystack,
$needle,
int $offset = 0,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
if ($haystack === '') {
if (\PHP_VERSION_ID >= 80000) {
if ($needle === '') {
return 0;
}
} else {
return false;
}
}
// iconv and mbstring do not support integer $needle
if ((int) $needle === $needle && $needle >= 0) {
$needle = (string) self::chr($needle);
}
$needle = (string) $needle;
if ($haystack === '') {
if (\PHP_VERSION_ID >= 80000 && $needle === '') {
return 0;
}
return false;
}
if ($needle === '' && \PHP_VERSION_ID < 80000) {
return false;
}
if ($clean_utf8) {
// mb_strrpos && iconv_strrpos is not tolerant to invalid characters
$needle = self::clean($needle);
$haystack = self::clean($haystack);
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
//
// fallback via mbstring
//
if (self::$SUPPORT['mbstring'] === true) {
if ($encoding === 'UTF-8') {
return \mb_strrpos($haystack, $needle, $offset);
}
return \mb_strrpos($haystack, $needle, $offset, $encoding);
}
//
// fallback for binary || ascii only
//
if (
$encoding === 'CP850'
||
$encoding === 'ASCII'
) {
return \strrpos($haystack, $needle, $offset);
}
if (
$encoding !== 'UTF-8'
&&
self::$SUPPORT['mbstring'] === false
) {
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
}
//
// fallback via intl
//
if (
$offset >= 0 // grapheme_strrpos() can't handle negative offset
&&
$encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
&&
self::$SUPPORT['intl'] === true
) {
$return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
if ($return_tmp !== false) {
return $return_tmp;
}
}
//
// fallback for ascii only
//
if (ASCII::is_ascii($haystack . $needle)) {
return \strrpos($haystack, $needle, $offset);
}
//
// fallback via vanilla php
//
$haystack_tmp = null;
if ($offset > 0) {
$haystack_tmp = self::substr($haystack, $offset);
} elseif ($offset < 0) {
$haystack_tmp = self::substr($haystack, 0, $offset);
$offset = 0;
}
if ($haystack_tmp !== null) {
if ($haystack_tmp === false) {
$haystack_tmp = '';
}
$haystack = (string) $haystack_tmp;
}
$pos = \strrpos($haystack, $needle);
if ($pos === false) {
return false;
}
/** @var false|string $str_tmp - needed for PhpStan (stubs error) */
$str_tmp = \substr($haystack, 0, $pos);
if ($str_tmp === false) {
return false;
}
return $offset + (int) self::strlen($str_tmp);
}
/**
* Find the position of the last occurrence of a substring in a string.
*
* @param string $haystack
* The string being checked, for the last occurrence
* of needle.
*
* @param string $needle
* The string to find in haystack.
*
* @param int $offset [optional] May be specified to begin searching an arbitrary number of characters into
* the string. Negative values will stop searching at an arbitrary point
* prior to the end of the string.
*
*
* @psalm-pure
*
* @return false|int
* The numeric position of the last occurrence of needle in the
* haystack string. If needle is not found, it returns false.
*/
public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
{
if ($haystack === '' || $needle === '') {
return false;
}
if (self::$SUPPORT['mbstring_func_overload'] === true) {
// "mb_" is available if overload is used, so use it ...
return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
}
return \strrpos($haystack, $needle, $offset);
}
/**
* Finds the length of the initial segment of a string consisting entirely of characters contained within a given
* mask.
*
* EXAMPLE: UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'
*
* @param string $str The input string.
* @param string $mask The mask of chars
* @param int $offset [optional]
* @param int|null $length [optional]
* @param string $encoding [optional] Set the charset.
*
* @psalm-pure
*
* @return false|int
*/
public static function strspn(
string $str,
string $mask,
int $offset = 0,
int $length = null,
string $encoding = 'UTF-8'
) {
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
if ($offset || $length !== null) {
if ($encoding === 'UTF-8') {
if ($length === null) {
$str = (string) \mb_substr($str, $offset);
} else {
$str = (string) \mb_substr($str, $offset, $length);
}
} else {
$str = (string) self::substr($str, $offset, $length, $encoding);
}
}
if ($str === '' || $mask === '') {
return 0;
}
$matches = [];
return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
}
/**
* Returns part of haystack string from the first occurrence of needle to the end of haystack.
*
* EXAMPLE:
* $str = 'iñtërnâtiônàlizætiøn';
* $search = 'nât';
*
* UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
* UTF8::strstr($str, $search, true)); // 'iñtër'
*
*
* @param string $haystack The input string. Must be valid UTF-8.
* @param string $needle The string to look for. Must be valid UTF-8.
* @param bool $before_needle [optional]
* If TRUE, strstr() returns the part of the
* haystack before the first occurrence of the needle (excluding the needle).
*
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return false|string
* A sub-string,
or false if needle is not found.
*/
public static function strstr(
string $haystack,
string $needle,
bool $before_needle = false,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
if ($haystack === '') {
if (\PHP_VERSION_ID >= 80000 && $needle === '') {
return '';
}
return false;
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$needle = self::clean($needle);
$haystack = self::clean($haystack);
}
if ($needle === '') {
if (\PHP_VERSION_ID >= 80000) {
return $haystack;
}
return false;
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
//
// fallback via mbstring
//
if (self::$SUPPORT['mbstring'] === true) {
if ($encoding === 'UTF-8') {
return \mb_strstr($haystack, $needle, $before_needle);
}
return \mb_strstr($haystack, $needle, $before_needle, $encoding);
}
//
// fallback for binary || ascii only
//
if (
$encoding === 'CP850'
||
$encoding === 'ASCII'
) {
return \strstr($haystack, $needle, $before_needle);
}
if (
$encoding !== 'UTF-8'
&&
self::$SUPPORT['mbstring'] === false
) {
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
}
//
// fallback via intl
//
if (
$encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
&&
self::$SUPPORT['intl'] === true
) {
$return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
if ($return_tmp !== false) {
return $return_tmp;
}
}
//
// fallback for ascii only
//
if (ASCII::is_ascii($haystack . $needle)) {
return \strstr($haystack, $needle, $before_needle);
}
//
// fallback via vanilla php
//
\preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
if (!isset($match[1])) {
return false;
}
if ($before_needle) {
return $match[1];
}
return self::substr($haystack, (int) self::strlen($match[1]));
}
/**
* Finds first occurrence of a string within another.
*
* @param string $haystack
* The string from which to get the first occurrence
* of needle.
*
* @param string $needle
* The string to find in haystack.
*
* @param bool $before_needle [optional]
* Determines which portion of haystack
* this function returns.
* If set to true, it returns all of haystack
* from the beginning to the first occurrence of needle.
* If set to false, it returns all of haystack
* from the first occurrence of needle to the end,
*
*
* @psalm-pure
*
* @return false|string
* The portion of haystack,
* or false if needle is not found.
*/
public static function strstr_in_byte(
string $haystack,
string $needle,
bool $before_needle = false
) {
if ($haystack === '' || $needle === '') {
return false;
}
if (self::$SUPPORT['mbstring_func_overload'] === true) {
// "mb_" is available if overload is used, so use it ...
return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
}
return \strstr($haystack, $needle, $before_needle);
}
/**
* Unicode transformation for case-less matching.
*
* EXAMPLE: UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'
*
* @see http://unicode.org/reports/tr21/tr21-5.html
*
* @param string $str The input string.
* @param bool $full [optional]
* true, replace full case folding chars (default)
* false, use only limited static array [UTF8::$COMMON_CASE_FOLD]
*
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
* @param string $encoding [optional] Set the charset.
* @param string|null $lang [optional] Set the language for special cases: az, el, lt, tr
* @param bool $lower [optional] Use lowercase string, otherwise use uppercase string. PS: uppercase
* is for some languages better ...
*
* @psalm-pure
*
* @return string
*/
public static function strtocasefold(
string $str,
bool $full = true,
bool $clean_utf8 = false,
string $encoding = 'UTF-8',
string $lang = null,
bool $lower = true
): string {
if ($str === '') {
return '';
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$str = self::clean($str);
}
$str = self::fixStrCaseHelper($str, $lower, $full);
if ($lang === null && $encoding === 'UTF-8') {
if ($lower) {
return \mb_strtolower($str);
}
return \mb_strtoupper($str);
}
if ($lower) {
return self::strtolower($str, $encoding, false, $lang);
}
return self::strtoupper($str, $encoding, false, $lang);
}
/**
* Make a string lowercase.
*
* EXAMPLE: UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'
*
* @see http://php.net/manual/en/function.mb-strtolower.php
*
* @param string $str The string being lowercased.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
* @param string|null $lang [optional] Set the language for special cases: az, el, lt,
* tr
* @param bool $try_to_keep_the_string_length [optional] true === try to keep the string length: e.g. ẞ
* -> ß
*
* @psalm-pure
*
* @return string
* String with all alphabetic characters converted to lowercase.
*/
public static function strtolower(
$str,
string $encoding = 'UTF-8',
bool $clean_utf8 = false,
string $lang = null,
bool $try_to_keep_the_string_length = false
): string {
// init
$str = (string) $str;
if ($str === '') {
return '';
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$str = self::clean($str);
}
// hack for old php version or for the polyfill ...
if ($try_to_keep_the_string_length) {
$str = self::fixStrCaseHelper($str, true);
}
if ($lang === null && $encoding === 'UTF-8') {
return \mb_strtolower($str);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
if ($lang !== null) {
if (self::$SUPPORT['intl'] === true) {
if (self::$INTL_TRANSLITERATOR_LIST === null) {
self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
}
$language_code = $lang . '-Lower';
if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
$language_code = 'Any-Lower';
}
/** @noinspection PhpComposerExtensionStubsInspection */
/** @noinspection UnnecessaryCastingInspection */
return (string) \transliterator_transliterate($language_code, $str);
}
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
}
// always fallback via symfony polyfill
return \mb_strtolower($str, $encoding);
}
/**
* Make a string uppercase.
*
* EXAMPLE: UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'
*
* @see http://php.net/manual/en/function.mb-strtoupper.php
*
* @param string $str The string being uppercased.
* @param string $encoding [optional] Set the charset.
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
* @param string|null $lang [optional] Set the language for special cases: az, el, lt,
* tr
* @param bool $try_to_keep_the_string_length [optional] true === try to keep the string length: e.g. ẞ
* -> ß
*
* @psalm-pure
*
* @return string
* String with all alphabetic characters converted to uppercase.
*/
public static function strtoupper(
$str,
string $encoding = 'UTF-8',
bool $clean_utf8 = false,
string $lang = null,
bool $try_to_keep_the_string_length = false
): string {
// init
$str = (string) $str;
if ($str === '') {
return '';
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$str = self::clean($str);
}
// hack for old php version or for the polyfill ...
if ($try_to_keep_the_string_length) {
$str = self::fixStrCaseHelper($str);
}
if ($lang === null && $encoding === 'UTF-8') {
return \mb_strtoupper($str);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
if ($lang !== null) {
if (self::$SUPPORT['intl'] === true) {
if (self::$INTL_TRANSLITERATOR_LIST === null) {
self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
}
$language_code = $lang . '-Upper';
if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
$language_code = 'Any-Upper';
}
/** @noinspection PhpComposerExtensionStubsInspection */
/** @noinspection UnnecessaryCastingInspection */
return (string) \transliterator_transliterate($language_code, $str);
}
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
}
// always fallback via symfony polyfill
return \mb_strtoupper($str, $encoding);
}
/**
* Translate characters or replace sub-strings.
*
* EXAMPLE:
*
* $array = [
* 'Hello' => '○●◎',
* '中文空白' => 'earth',
* ];
* UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
*
*
* @see http://php.net/manual/en/function.strtr.php
*
* @param string $str The string being translated.
* @param string|string[] $from The string replacing from.
* @param string|string[] $to [optional] The string being translated to to.
*
* @psalm-pure
*
* @return string
* This function returns a copy of str, translating all occurrences of each character in "from"
* to the corresponding character in "to".
*/
public static function strtr(string $str, $from, $to = ''): string
{
if ($str === '') {
return '';
}
if ($from === $to) {
return $str;
}
if ($to !== '') {
if (!\is_array($from)) {
$from = self::str_split($from);
}
if (!\is_array($to)) {
$to = self::str_split($to);
}
$count_from = \count($from);
$count_to = \count($to);
if ($count_from !== $count_to) {
if ($count_from > $count_to) {
$from = \array_slice($from, 0, $count_to);
} elseif ($count_from < $count_to) {
$to = \array_slice($to, 0, $count_from);
}
}
$from = \array_combine($from, $to);
/** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
if ($from === false) {
throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
}
}
if (\is_string($from)) {
return \str_replace($from, $to, $str);
}
return \strtr($str, $from);
}
/**
* Return the width of a string.
*
* INFO: use UTF8::strlen() for the byte-length
*
* EXAMPLE: UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21
*
* @param string $str The input string.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return int
*/
public static function strwidth(
string $str,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
): int {
if ($str === '') {
return 0;
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
if ($clean_utf8) {
// iconv and mbstring are not tolerant to invalid encoding
// further, their behaviour is inconsistent with that of PHP's substr
$str = self::clean($str);
}
//
// fallback via mbstring
//
if (self::$SUPPORT['mbstring'] === true) {
if ($encoding === 'UTF-8') {
return \mb_strwidth($str);
}
return \mb_strwidth($str, $encoding);
}
//
// fallback via vanilla php
//
if ($encoding !== 'UTF-8') {
$str = self::encode('UTF-8', $str, false, $encoding);
}
$wide = 0;
$str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
return ($wide << 1) + (int) self::strlen($str);
}
/**
* Get part of a string.
*
* EXAMPLE: UTF8::substr('中文空白', 1, 2); // '文空'
*
* @see http://php.net/manual/en/function.mb-substr.php
*
* @param string $str The string being checked.
* @param int $offset The first position used in str.
* @param int|null $length [optional] The maximum length of the returned string.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return false|string
* The portion of str specified by the offset and
* length parameters.If str is shorter than offset
* characters long, FALSE will be returned.
*/
public static function substr(
string $str,
int $offset = 0,
int $length = null,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
// empty string
if ($str === '' || $length === 0) {
return '';
}
if ($clean_utf8) {
// iconv and mbstring are not tolerant to invalid encoding
// further, their behaviour is inconsistent with that of PHP's substr
$str = self::clean($str);
}
// whole string
if (!$offset && $length === null) {
return $str;
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
//
// fallback via mbstring
//
if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
if ($length === null) {
return \mb_substr($str, $offset);
}
return \mb_substr($str, $offset, $length);
}
//
// fallback for binary || ascii only
//
if (
$encoding === 'CP850'
||
$encoding === 'ASCII'
) {
if ($length === null) {
return \substr($str, $offset);
}
return \substr($str, $offset, $length);
}
// otherwise we need the string-length
$str_length = 0;
if ($offset || $length === null) {
$str_length = self::strlen($str, $encoding);
}
// e.g.: invalid chars + mbstring not installed
if ($str_length === false) {
return false;
}
// empty string
if ($offset === $str_length && !$length) {
return '';
}
// impossible
if ($offset && $offset > $str_length) {
return '';
}
$length = $length ?? (int) $str_length;
if (
$encoding !== 'UTF-8'
&&
self::$SUPPORT['mbstring'] === false
) {
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
}
//
// fallback via intl
//
if (
$encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
&&
$offset >= 0 // grapheme_substr() can't handle negative offset
&&
self::$SUPPORT['intl'] === true
) {
$return_tmp = \grapheme_substr($str, $offset, $length);
if ($return_tmp !== false) {
return $return_tmp;
}
}
//
// fallback via iconv
//
if (
$length >= 0 // "iconv_substr()" can't handle negative length
&&
self::$SUPPORT['iconv'] === true
) {
$return_tmp = \iconv_substr($str, $offset, $length);
if ($return_tmp !== false) {
return $return_tmp;
}
}
//
// fallback for ascii only
//
if (ASCII::is_ascii($str)) {
return \substr($str, $offset, $length);
}
//
// fallback via vanilla php
//
// split to array, and remove invalid characters
$array = self::str_split($str);
// extract relevant part, and join to make sting again
return \implode('', \array_slice($array, $offset, $length));
}
/**
* Binary-safe comparison of two strings from an offset, up to a length of characters.
*
* EXAMPLE:
* UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
* UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
* UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
*
*
* @param string $str1
The main string being compared.
* @param string $str2 The secondary string being compared.
* @param int $offset [optional] The start position for the comparison. If negative, it starts
* counting from the end of the string.
* @param int|null $length [optional] The length of the comparison. The default value is the largest
* of the length of the str compared to the length of main_str less the
* offset.
* @param bool $case_insensitivity [optional] If case_insensitivity is TRUE, comparison is case
* insensitive.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return int
* < 0 if str1 is less than str2;
* > 0 if str1 is greater than str2,
* 0 if they are equal
*/
public static function substr_compare(
string $str1,
string $str2,
int $offset = 0,
int $length = null,
bool $case_insensitivity = false,
string $encoding = 'UTF-8'
): int {
if (
$offset !== 0
||
$length !== null
) {
if ($encoding === 'UTF-8') {
if ($length === null) {
$str1 = (string) \mb_substr($str1, $offset);
} else {
$str1 = (string) \mb_substr($str1, $offset, $length);
}
$str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
} else {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
$str1 = (string) self::substr($str1, $offset, $length, $encoding);
$str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
}
}
if ($case_insensitivity) {
return self::strcasecmp($str1, $str2, $encoding);
}
return self::strcmp($str1, $str2);
}
/**
* Count the number of substring occurrences.
*
* EXAMPLE: UTF8::substr_count('中文空白', '文空', 1, 2); // 1
*
* @see http://php.net/manual/en/function.substr-count.php
*
* @param string $haystack The string to search in.
* @param string $needle The substring to search for.
* @param int $offset [optional] The offset where to start counting.
* @param int|null $length [optional]
* The maximum length after the specified offset to search for the
* substring. It outputs a warning if the offset plus the length is
* greater than the haystack length.
*
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return false|int
* This functions returns an integer or false if there isn't a string.
*/
public static function substr_count(
string $haystack,
string $needle,
int $offset = 0,
int $length = null,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
) {
if ($needle === '') {
return false;
}
if ($haystack === '') {
if (\PHP_VERSION_ID >= 80000) {
return 0;
}
return 0;
}
if ($length === 0) {
return 0;
}
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$needle = self::clean($needle);
$haystack = self::clean($haystack);
}
if ($offset || $length > 0) {
if ($length === null) {
$length_tmp = self::strlen($haystack, $encoding);
if ($length_tmp === false) {
return false;
}
$length = (int) $length_tmp;
}
if ($encoding === 'UTF-8') {
$haystack = (string) \mb_substr($haystack, $offset, $length);
} else {
$haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
}
}
if (
$encoding !== 'UTF-8'
&&
self::$SUPPORT['mbstring'] === false
) {
/**
* @psalm-suppress ImpureFunctionCall - is is only a warning
*/
\trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
}
if (self::$SUPPORT['mbstring'] === true) {
if ($encoding === 'UTF-8') {
return \mb_substr_count($haystack, $needle);
}
return \mb_substr_count($haystack, $needle, $encoding);
}
\preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
return \count($matches);
}
/**
* Count the number of substring occurrences.
*
* @param string $haystack
* The string being checked.
*
* @param string $needle
* The string being found.
*
* @param int $offset [optional]
* The offset where to start counting
*
* @param int|null $length [optional]
* The maximum length after the specified offset to search for the
* substring. It outputs a warning if the offset plus the length is
* greater than the haystack length.
*
*
* @psalm-pure
*
* @return false|int
* The number of times the
* needle substring occurs in the
* haystack string.
*/
public static function substr_count_in_byte(
string $haystack,
string $needle,
int $offset = 0,
int $length = null
) {
if ($haystack === '' || $needle === '') {
return 0;
}
if (
($offset || $length !== null)
&&
self::$SUPPORT['mbstring_func_overload'] === true
) {
if ($length === null) {
$length_tmp = self::strlen($haystack);
if ($length_tmp === false) {
return false;
}
$length = (int) $length_tmp;
}
if (
(
$length !== 0
&&
$offset !== 0
)
&&
($length + $offset) <= 0
&&
\PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
) {
return false;
}
/** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
$haystack_tmp = \substr($haystack, $offset, $length);
if ($haystack_tmp === false) {
$haystack_tmp = '';
}
$haystack = (string) $haystack_tmp;
}
if (self::$SUPPORT['mbstring_func_overload'] === true) {
// "mb_" is available if overload is used, so use it ...
return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
}
if ($length === null) {
return \substr_count($haystack, $needle, $offset);
}
return \substr_count($haystack, $needle, $offset, $length);
}
/**
* Returns the number of occurrences of $substring in the given string.
* By default, the comparison is case-sensitive, but can be made insensitive
* by setting $case_sensitive to false.
*
* @param string $str The input string.
* @param string $substring The substring to search for.
* @param bool $case_sensitive [optional] Whether or not to enforce case-sensitivity. Default: true
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return int
*/
public static function substr_count_simple(
string $str,
string $substring,
bool $case_sensitive = true,
string $encoding = 'UTF-8'
): int {
if ($str === '' || $substring === '') {
return 0;
}
if ($encoding === 'UTF-8') {
if ($case_sensitive) {
return (int) \mb_substr_count($str, $substring);
}
return (int) \mb_substr_count(
\mb_strtoupper($str),
\mb_strtoupper($substring)
);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
if ($case_sensitive) {
return (int) \mb_substr_count($str, $substring, $encoding);
}
return (int) \mb_substr_count(
self::strtocasefold($str, true, false, $encoding, null, false),
self::strtocasefold($substring, true, false, $encoding, null, false),
$encoding
);
}
/**
* Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
*
* EXMAPLE:
* UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
* UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
*
*
* @param string $haystack The string to search in.
* @param string $needle The substring to search for.
*
* @psalm-pure
*
* @return string
* Return the sub-string.
*/
public static function substr_ileft(string $haystack, string $needle): string
{
if ($haystack === '') {
return '';
}
if ($needle === '') {
return $haystack;
}
if (self::str_istarts_with($haystack, $needle)) {
$haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
}
return $haystack;
}
/**
* Get part of a string process in bytes.
*
* @param string $str The string being checked.
* @param int $offset The first position used in str.
* @param int|null $length [optional] The maximum length of the returned string.
*
* @psalm-pure
*
* @return false|string
* The portion of str specified by the offset and
* length parameters.If str is shorter than offset
* characters long, FALSE will be returned.
*/
public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
{
// empty string
if ($str === '' || $length === 0) {
return '';
}
// whole string
if (!$offset && $length === null) {
return $str;
}
if (self::$SUPPORT['mbstring_func_overload'] === true) {
// "mb_" is available if overload is used, so use it ...
return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
}
return \substr($str, $offset, $length ?? 2147483647);
}
/**
* Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
*
* EXAMPLE:
* UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
* UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
*
*
* @param string $haystack
The string to search in.
* @param string $needle The substring to search for.
*
* @psalm-pure
*
* @return string
* Return the sub-string.
*/
public static function substr_iright(string $haystack, string $needle): string
{
if ($haystack === '') {
return '';
}
if ($needle === '') {
return $haystack;
}
if (self::str_iends_with($haystack, $needle)) {
$haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
}
return $haystack;
}
/**
* Removes a prefix ($needle) from the beginning of the string ($haystack).
*
* EXAMPLE:
* UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
* UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
*
*
* @param string $haystack
The string to search in.
* @param string $needle The substring to search for.
*
* @psalm-pure
*
* @return string
* Return the sub-string.
*/
public static function substr_left(string $haystack, string $needle): string
{
if ($haystack === '') {
return '';
}
if ($needle === '') {
return $haystack;
}
if (self::str_starts_with($haystack, $needle)) {
$haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
}
return $haystack;
}
/**
* Replace text within a portion of a string.
*
* EXAMPLE: UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')
*
* source: https://gist.github.com/stemar/8287074
*
* @param string|string[] $str The input string or an array of stings.
* @param string|string[] $replacement The replacement string or an array of stings.
* @param int|int[] $offset
* If start is positive, the replacing will begin at the start'th offset
* into string.
*
* If start is negative, the replacing will begin at the start'th character
* from the end of string.
*
* @param int|int[]|null $length [optional] If given and is positive, it represents the length of the
* portion of string which is to be replaced. If it is negative, it
* represents the number of characters from the end of string at which to
* stop replacing. If it is not given, then it will default to strlen(
* string ); i.e. end the replacing at the end of string. Of course, if
* length is zero then this function will have the effect of inserting
* replacement into string at the given start offset.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string|string[]
* The result string is returned. If string is an array then array is returned.
*/
public static function substr_replace(
$str,
$replacement,
$offset,
$length = null,
string $encoding = 'UTF-8'
) {
if (\is_array($str)) {
$num = \count($str);
// the replacement
if (\is_array($replacement)) {
$replacement = \array_slice($replacement, 0, $num);
} else {
$replacement = \array_pad([$replacement], $num, $replacement);
}
// the offset
if (\is_array($offset)) {
$offset = \array_slice($offset, 0, $num);
foreach ($offset as &$value_tmp) {
$value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
}
unset($value_tmp);
} else {
$offset = \array_pad([$offset], $num, $offset);
}
// the length
if ($length === null) {
$length = \array_fill(0, $num, 0);
} elseif (\is_array($length)) {
$length = \array_slice($length, 0, $num);
foreach ($length as &$value_tmp_V2) {
$value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
}
unset($value_tmp_V2);
} else {
$length = \array_pad([$length], $num, $length);
}
// recursive call
return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
}
if (\is_array($replacement)) {
if ($replacement !== []) {
$replacement = $replacement[0];
} else {
$replacement = '';
}
}
// init
$str = (string) $str;
$replacement = (string) $replacement;
if (\is_array($length)) {
throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
}
if (\is_array($offset)) {
throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
}
if ($str === '') {
return $replacement;
}
if (self::$SUPPORT['mbstring'] === true) {
$string_length = (int) self::strlen($str, $encoding);
if ($offset < 0) {
$offset = (int) \max(0, $string_length + $offset);
} elseif ($offset > $string_length) {
$offset = $string_length;
}
if ($length !== null && $length < 0) {
$length = (int) \max(0, $string_length - $offset + $length);
} elseif ($length === null || $length > $string_length) {
$length = $string_length;
}
/** @noinspection AdditionOperationOnArraysInspection */
if (($offset + $length) > $string_length) {
$length = $string_length - $offset;
}
/** @noinspection AdditionOperationOnArraysInspection */
return ((string) \mb_substr($str, 0, $offset, $encoding)) .
$replacement .
((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
}
//
// fallback for ascii only
//
if (ASCII::is_ascii($str)) {
return ($length === null) ?
\substr_replace($str, $replacement, $offset) :
\substr_replace($str, $replacement, $offset, $length);
}
//
// fallback via vanilla php
//
\preg_match_all('/./us', $str, $str_matches);
\preg_match_all('/./us', $replacement, $replacement_matches);
if ($length === null) {
$length_tmp = self::strlen($str, $encoding);
if ($length_tmp === false) {
// e.g.: non mbstring support + invalid chars
return '';
}
$length = (int) $length_tmp;
}
\array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
return \implode('', $str_matches[0]);
}
/**
* Removes a suffix ($needle) from the end of the string ($haystack).
*
* EXAMPLE:
* UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
* UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
*
*
* @param string $haystack The string to search in.
* @param string $needle The substring to search for.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
*
* @psalm-pure
*
* @return string
* Return the sub-string.
*/
public static function substr_right(
string $haystack,
string $needle,
string $encoding = 'UTF-8'
): string {
if ($haystack === '') {
return '';
}
if ($needle === '') {
return $haystack;
}
if (
$encoding === 'UTF-8'
&&
\substr($haystack, -\strlen($needle)) === $needle
) {
return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
}
if (\substr($haystack, -\strlen($needle)) === $needle) {
return (string) self::substr(
$haystack,
0,
(int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
$encoding
);
}
return $haystack;
}
/**
* Returns a case swapped version of the string.
*
* EXAMPLE: UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'
*
* @param string $str The input string.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return string
* Each character's case swapped.
*/
public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
{
if ($str === '') {
return '';
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$str = self::clean($str);
}
if ($encoding === 'UTF-8') {
return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
}
return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
}
/**
* Checks whether symfony-polyfills are used.
*
* @psalm-pure
*
* @return bool
* true if in use, false otherwise
*
* @internal Please do not use it anymore, we will make is private in next major version.
*/
public static function symfony_polyfill_used(): bool
{
// init
$return = false;
$return_tmp = \extension_loaded('mbstring');
if (!$return_tmp && \function_exists('mb_strlen')) {
$return = true;
}
$return_tmp = \extension_loaded('iconv');
if (!$return_tmp && \function_exists('iconv')) {
$return = true;
}
return $return;
}
/**
* @param string $str
* @param int $tab_length
*
* @psalm-pure
*
* @return string
*/
public static function tabs_to_spaces(string $str, int $tab_length = 4): string
{
if ($tab_length === 4) {
$spaces = ' ';
} elseif ($tab_length === 2) {
$spaces = ' ';
} else {
$spaces = \str_repeat(' ', $tab_length);
}
return \str_replace("\t", $spaces, $str);
}
/**
* Converts the first character of each word in the string to uppercase
* and all other chars to lowercase.
*
* @param string $str The input string.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
* @param string|null $lang [optional] Set the language for special cases: az, el, lt,
* tr
* @param bool $try_to_keep_the_string_length [optional] true === try to keep the string length: e.g. ẞ
* -> ß
*
* @psalm-pure
*
* @return string
* A string with all characters of $str being title-cased.
*/
public static function titlecase(
string $str,
string $encoding = 'UTF-8',
bool $clean_utf8 = false,
string $lang = null,
bool $try_to_keep_the_string_length = false
): string {
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$str = self::clean($str);
}
if (
$lang === null
&&
!$try_to_keep_the_string_length
) {
if ($encoding === 'UTF-8') {
return \mb_convert_case($str, \MB_CASE_TITLE);
}
$encoding = self::normalize_encoding($encoding, 'UTF-8');
return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
}
return self::str_titleize(
$str,
null,
$encoding,
false,
$lang,
$try_to_keep_the_string_length,
false
);
}
/**
* alias for "UTF8::to_ascii()"
*
* @param string $str
* @param string $subst_chr
* @param bool $strict
*
* @psalm-pure
*
* @return string
*
* @see UTF8::to_ascii()
* @deprecated please use "UTF8::to_ascii()"
*/
public static function toAscii(
string $str,
string $subst_chr = '?',
bool $strict = false
): string {
return self::to_ascii($str, $subst_chr, $strict);
}
/**
* alias for "UTF8::to_iso8859()"
*
* @param string|string[] $str
*
* @psalm-pure
*
* @return string|string[]
*
* @see UTF8::to_iso8859()
* @deprecated please use "UTF8::to_iso8859()"
*/
public static function toIso8859($str)
{
return self::to_iso8859($str);
}
/**
* alias for "UTF8::to_latin1()"
*
* @param string|string[] $str
*
* @psalm-pure
*
* @return string|string[]
*
* @see UTF8::to_iso8859()
* @deprecated please use "UTF8::to_iso8859()"
*/
public static function toLatin1($str)
{
return self::to_iso8859($str);
}
/**
* alias for "UTF8::to_utf8()"
*
* @param string|string[] $str
*
* @psalm-pure
*
* @return string|string[]
*
* @see UTF8::to_utf8()
* @deprecated please use "UTF8::to_utf8()"
*/
public static function toUTF8($str)
{
return self::to_utf8($str);
}
/**
* Convert a string into ASCII.
*
* EXAMPLE: UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'
*
* @param string $str The input string.
* @param string $unknown [optional] Character use if character unknown. (default is ?)
* @param bool $strict [optional] Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
* performance
*
* @psalm-pure
*
* @return string
*/
public static function to_ascii(
string $str,
string $unknown = '?',
bool $strict = false
): string {
return ASCII::to_transliterate($str, $unknown, $strict);
}
/**
* @param bool|int|float|string $str
*
* @psalm-pure
*
* @return bool
*/
public static function to_boolean($str): bool
{
// init
$str = (string) $str;
if ($str === '') {
return false;
}
// Info: http://php.net/manual/en/filter.filters.validate.php
$map = [
'true' => true,
'1' => true,
'on' => true,
'yes' => true,
'false' => false,
'0' => false,
'off' => false,
'no' => false,
];
if (isset($map[$str])) {
return $map[$str];
}
$key = \strtolower($str);
if (isset($map[$key])) {
return $map[$key];
}
if (\is_numeric($str)) {
return ((float) $str + 0) > 0;
}
return (bool) \trim($str);
}
/**
* Convert given string to safe filename (and keep string case).
*
* @param string $str
* @param bool $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
* simply replaced with hyphen.
* @param string $fallback_char
*
* @psalm-pure
*
* @return string
*/
public static function to_filename(
string $str,
bool $use_transliterate = false,
string $fallback_char = '-'
): string {
return ASCII::to_filename(
$str,
$use_transliterate,
$fallback_char
);
}
/**
* Convert a string into "ISO-8859"-encoding (Latin-1).
*
* EXAMPLE: UTF8::to_utf8(UTF8::to_iso8859(' -ABC-中文空白- ')); // ' -ABC-????- '
*
* @param string|string[] $str
*
* @psalm-pure
*
* @return string|string[]
*/
public static function to_iso8859($str)
{
if (\is_array($str)) {
foreach ($str as $k => &$v) {
$v = self::to_iso8859($v);
}
return $str;
}
$str = (string) $str;
if ($str === '') {
return '';
}
return self::utf8_decode($str);
}
/**
* alias for "UTF8::to_iso8859()"
*
* @param string|string[] $str
*
* @psalm-pure
*
* @return string|string[]
*
* @see UTF8::to_iso8859()
* @deprecated please use "UTF8::to_iso8859()"
*/
public static function to_latin1($str)
{
return self::to_iso8859($str);
}
/**
* This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
*
*
* - It decode UTF-8 codepoints and Unicode escape sequences.
* - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.
* - WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
* case.
*
*
* EXAMPLE: UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')
*
* @param string|string[] $str Any string or array of strings.
* @param bool $decode_html_entity_to_utf8 Set to true, if you need to decode html-entities.
*
* @psalm-pure
*
* @return string|string[]
* The UTF-8 encoded string
*
* @template TToUtf8
* @phpstan-param TToUtf8 $str
* @phpstan-return TToUtf8
*
* @noinspection SuspiciousBinaryOperationInspection
*/
public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
{
if (\is_array($str)) {
foreach ($str as $k => &$v) {
$v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
}
return $str;
}
/** @phpstan-var TToUtf8 $str */
$str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
return $str;
}
/**
* This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
*
*
* - It decode UTF-8 codepoints and Unicode escape sequences.
* - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.
* - WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
* case.
*
*
* EXAMPLE: UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'
*
* @param string $str Any string.
* @param bool $decode_html_entity_to_utf8 Set to true, if you need to decode html-entities.
*
* @psalm-pure
*
* @return string
* The UTF-8 encoded string
*
* @noinspection SuspiciousBinaryOperationInspection
*/
public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
{
if ($str === '') {
return $str;
}
$max = \strlen($str);
$buf = '';
for ($i = 0; $i < $max; ++$i) {
$c1 = $str[$i];
if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2;
++$i;
} else { // not valid UTF8 - convert it
$buf .= self::to_utf8_convert_helper($c1);
}
} elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
$c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2 . $c3;
$i += 2;
} else { // not valid UTF8 - convert it
$buf .= self::to_utf8_convert_helper($c1);
}
} elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
$c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
$c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
$buf .= $c1 . $c2 . $c3 . $c4;
$i += 3;
} else { // not valid UTF8 - convert it
$buf .= self::to_utf8_convert_helper($c1);
}
} else { // doesn't look like UTF8, but should be converted
$buf .= self::to_utf8_convert_helper($c1);
}
} elseif (($c1 & "\xC0") === "\x80") { // needs conversion
$buf .= self::to_utf8_convert_helper($c1);
} else { // it doesn't need conversion
$buf .= $c1;
}
}
// decode unicode escape sequences + unicode surrogate pairs
$buf = \preg_replace_callback(
'/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
/**
* @param array $matches
*
* @psalm-pure
*
* @return string
*/
static function (array $matches): string {
if (isset($matches[3])) {
$cp = (int) \hexdec($matches[3]);
} else {
// http://unicode.org/faq/utf_bom.html#utf16-4
$cp = ((int) \hexdec($matches[1]) << 10)
+ (int) \hexdec($matches[2])
+ 0x10000
- (0xD800 << 10)
- 0xDC00;
}
// https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
//
// php_utf32_utf8(unsigned char *buf, unsigned k)
if ($cp < 0x80) {
return (string) self::chr($cp);
}
if ($cp < 0xA0) {
/** @noinspection UnnecessaryCastingInspection */
return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
}
return self::decimal_to_chr($cp);
},
$buf
);
if ($buf === null) {
return '';
}
// decode UTF-8 codepoints
if ($decode_html_entity_to_utf8) {
$buf = self::html_entity_decode($buf);
}
return $buf;
}
/**
* Returns the given string as an integer, or null if the string isn't numeric.
*
* @param string $str
*
* @psalm-pure
*
* @return int|null
* null if the string isn't numeric
*/
public static function to_int(string $str)
{
if (\is_numeric($str)) {
return (int) $str;
}
return null;
}
/**
* Returns the given input as string, or null if the input isn't int|float|string
* and do not implement the "__toString()" method.
*
* @param float|int|object|string|null $input
*
* @psalm-pure
*
* @return string|null
* null if the input isn't int|float|string and has no "__toString()" method
*/
public static function to_string($input)
{
if ($input === null) {
return null;
}
/** @var string $input_type - hack for psalm */
$input_type = \gettype($input);
if (
$input_type === 'string'
||
$input_type === 'integer'
||
$input_type === 'float'
||
$input_type === 'double'
) {
return (string) $input;
}
if ($input_type === 'object') {
/** @noinspection PhpSillyAssignmentInspection */
/** @var object $input - hack for psalm / phpstan */
$input = $input;
/** @noinspection NestedPositiveIfStatementsInspection */
/** @noinspection MissingOrEmptyGroupStatementInspection */
if (\method_exists($input, '__toString')) {
return (string) $input;
}
}
return null;
}
/**
* Strip whitespace or other characters from the beginning and end of a UTF-8 string.
*
* INFO: This is slower then "trim()"
*
* We can only use the original-function, if we use <= 7-Bit in the string / chars
* but the check for ASCII (7-Bit) cost more time, then we can safe here.
*
* EXAMPLE: UTF8::trim(' -ABC-中文空白- '); // '-ABC-中文空白-'
*
* @param string $str The string to be trimmed
* @param string|null $chars [optional] Optional characters to be stripped
*
* @psalm-pure
*
* @return string
* The trimmed string.
*/
public static function trim(string $str = '', string $chars = null): string
{
if ($str === '') {
return '';
}
if (self::$SUPPORT['mbstring'] === true) {
if ($chars !== null) {
/** @noinspection PregQuoteUsageInspection */
$chars = \preg_quote($chars);
$pattern = "^[${chars}]+|[${chars}]+\$";
} else {
$pattern = '^[\\s]+|[\\s]+$';
}
/** @noinspection PhpComposerExtensionStubsInspection */
return (string) \mb_ereg_replace($pattern, '', $str);
}
if ($chars !== null) {
$chars = \preg_quote($chars, '/');
$pattern = "^[${chars}]+|[${chars}]+\$";
} else {
$pattern = '^[\\s]+|[\\s]+$';
}
return self::regex_replace($str, $pattern, '');
}
/**
* Makes string's first char uppercase.
*
* EXAMPLE: UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'
*
* @param string $str The input string.
* @param string $encoding [optional] Set the charset for e.g. "mb_" function
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
* @param string|null $lang [optional] Set the language for special cases: az, el, lt,
* tr
* @param bool $try_to_keep_the_string_length [optional] true === try to keep the string length: e.g. ẞ
* -> ß
*
* @psalm-pure
*
* @return string
* The resulting string with with char uppercase.
*/
public static function ucfirst(
string $str,
string $encoding = 'UTF-8',
bool $clean_utf8 = false,
string $lang = null,
bool $try_to_keep_the_string_length = false
): string {
if ($str === '') {
return '';
}
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$str = self::clean($str);
}
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
if ($encoding === 'UTF-8') {
$str_part_two = (string) \mb_substr($str, 1);
if ($use_mb_functions) {
$str_part_one = \mb_strtoupper(
(string) \mb_substr($str, 0, 1)
);
} else {
$str_part_one = self::strtoupper(
(string) \mb_substr($str, 0, 1),
$encoding,
false,
$lang,
$try_to_keep_the_string_length
);
}
} else {
$encoding = self::normalize_encoding($encoding, 'UTF-8');
$str_part_two = (string) self::substr($str, 1, null, $encoding);
if ($use_mb_functions) {
$str_part_one = \mb_strtoupper(
(string) \mb_substr($str, 0, 1, $encoding),
$encoding
);
} else {
$str_part_one = self::strtoupper(
(string) self::substr($str, 0, 1, $encoding),
$encoding,
false,
$lang,
$try_to_keep_the_string_length
);
}
}
return $str_part_one . $str_part_two;
}
/**
* alias for "UTF8::ucfirst()"
*
* @param string $str
* @param string $encoding
* @param bool $clean_utf8
*
* @psalm-pure
*
* @return string
*
* @see UTF8::ucfirst()
* @deprecated please use "UTF8::ucfirst()"
*/
public static function ucword(
string $str,
string $encoding = 'UTF-8',
bool $clean_utf8 = false
): string {
return self::ucfirst($str, $encoding, $clean_utf8);
}
/**
* Uppercase for all words in the string.
*
* EXAMPLE: UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'
*
* @param string $str The input string.
* @param string[] $exceptions [optional] Exclusion for some words.
* @param string $char_list [optional] Additional chars that contains to words and do not start a new
* word.
* @param string $encoding [optional] Set the charset.
* @param bool $clean_utf8 [optional] Remove non UTF-8 chars from the string.
*
* @psalm-pure
*
* @return string
*/
public static function ucwords(
string $str,
array $exceptions = [],
string $char_list = '',
string $encoding = 'UTF-8',
bool $clean_utf8 = false
): string {
if (!$str) {
return '';
}
// INFO: mb_convert_case($str, MB_CASE_TITLE);
// -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
if ($clean_utf8) {
// "mb_strpos()" and "iconv_strpos()" returns wrong position,
// if invalid characters are found in $haystack before $needle
$str = self::clean($str);
}
$use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
if (
$use_php_default_functions
&&
ASCII::is_ascii($str)
) {
return \ucwords($str);
}
$words = self::str_to_words($str, $char_list);
$use_exceptions = $exceptions !== [];
$words_str = '';
foreach ($words as &$word) {
if (!$word) {
continue;
}
if (
!$use_exceptions
||
!\in_array($word, $exceptions, true)
) {
$words_str .= self::ucfirst($word, $encoding);
} else {
$words_str .= $word;
}
}
return $words_str;
}
/**
* Multi decode HTML entity + fix urlencoded-win1252-chars.
*
* EXAMPLE: UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'
*
* e.g:
* 'test+test' => 'test test'
* 'Düsseldorf' => 'Düsseldorf'
* 'D%FCsseldorf' => 'Düsseldorf'
* 'Düsseldorf' => 'Düsseldorf'
* 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf'
* 'Düsseldorf' => 'Düsseldorf'
* 'D%C3%BCsseldorf' => 'Düsseldorf'
* 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf'
* 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
*
* @param string $str The input string.
* @param bool $multi_decode Decode as often as possible.
*
* @psalm-pure
*
* @return string
*/
public static function urldecode(string $str, bool $multi_decode = true): string
{
if ($str === '') {
return '';
}
$str = self::urldecode_unicode_helper($str);
if ($multi_decode) {
do {
$str_compare = $str;
/**
* @psalm-suppress PossiblyInvalidArgument
*/
$str = \urldecode(
self::html_entity_decode(
self::to_utf8($str),
\ENT_QUOTES | \ENT_HTML5
)
);
} while ($str_compare !== $str);
} else {
/**
* @psalm-suppress PossiblyInvalidArgument
*/
$str = \urldecode(
self::html_entity_decode(
self::to_utf8($str),
\ENT_QUOTES | \ENT_HTML5
)
);
}
return self::fix_simple_utf8($str);
}
/**
* Return a array with "urlencoded"-win1252 -> UTF-8
*
* @psalm-pure
*
* @return string[]
*
* @deprecated please use the "UTF8::urldecode()" function to decode a string
*/
public static function urldecode_fix_win1252_chars(): array
{
return [
'%20' => ' ',
'%21' => '!',
'%22' => '"',
'%23' => '#',
'%24' => '$',
'%25' => '%',
'%26' => '&',
'%27' => "'",
'%28' => '(',
'%29' => ')',
'%2A' => '*',
'%2B' => '+',
'%2C' => ',',
'%2D' => '-',
'%2E' => '.',
'%2F' => '/',
'%30' => '0',
'%31' => '1',
'%32' => '2',
'%33' => '3',
'%34' => '4',
'%35' => '5',
'%36' => '6',
'%37' => '7',
'%38' => '8',
'%39' => '9',
'%3A' => ':',
'%3B' => ';',
'%3C' => '<',
'%3D' => '=',
'%3E' => '>',
'%3F' => '?',
'%40' => '@',
'%41' => 'A',
'%42' => 'B',
'%43' => 'C',
'%44' => 'D',
'%45' => 'E',
'%46' => 'F',
'%47' => 'G',
'%48' => 'H',
'%49' => 'I',
'%4A' => 'J',
'%4B' => 'K',
'%4C' => 'L',
'%4D' => 'M',
'%4E' => 'N',
'%4F' => 'O',
'%50' => 'P',
'%51' => 'Q',
'%52' => 'R',
'%53' => 'S',
'%54' => 'T',
'%55' => 'U',
'%56' => 'V',
'%57' => 'W',
'%58' => 'X',
'%59' => 'Y',
'%5A' => 'Z',
'%5B' => '[',
'%5C' => '\\',
'%5D' => ']',
'%5E' => '^',
'%5F' => '_',
'%60' => '`',
'%61' => 'a',
'%62' => 'b',
'%63' => 'c',
'%64' => 'd',
'%65' => 'e',
'%66' => 'f',
'%67' => 'g',
'%68' => 'h',
'%69' => 'i',
'%6A' => 'j',
'%6B' => 'k',
'%6C' => 'l',
'%6D' => 'm',
'%6E' => 'n',
'%6F' => 'o',
'%70' => 'p',
'%71' => 'q',
'%72' => 'r',
'%73' => 's',
'%74' => 't',
'%75' => 'u',
'%76' => 'v',
'%77' => 'w',
'%78' => 'x',
'%79' => 'y',
'%7A' => 'z',
'%7B' => '{',
'%7C' => '|',
'%7D' => '}',
'%7E' => '~',
'%7F' => '',
'%80' => '`',
'%81' => '',
'%82' => '‚',
'%83' => 'ƒ',
'%84' => '„',
'%85' => '…',
'%86' => '†',
'%87' => '‡',
'%88' => 'ˆ',
'%89' => '‰',
'%8A' => 'Š',
'%8B' => '‹',
'%8C' => 'Œ',
'%8D' => '',
'%8E' => 'Ž',
'%8F' => '',
'%90' => '',
'%91' => '‘',
'%92' => '’',
'%93' => '“',
'%94' => '”',
'%95' => '•',
'%96' => '–',
'%97' => '—',
'%98' => '˜',
'%99' => '™',
'%9A' => 'š',
'%9B' => '›',
'%9C' => 'œ',
'%9D' => '',
'%9E' => 'ž',
'%9F' => 'Ÿ',
'%A0' => '',
'%A1' => '¡',
'%A2' => '¢',
'%A3' => '£',
'%A4' => '¤',
'%A5' => '¥',
'%A6' => '¦',
'%A7' => '§',
'%A8' => '¨',
'%A9' => '©',
'%AA' => 'ª',
'%AB' => '«',
'%AC' => '¬',
'%AD' => '',
'%AE' => '®',
'%AF' => '¯',
'%B0' => '°',
'%B1' => '±',
'%B2' => '²',
'%B3' => '³',
'%B4' => '´',
'%B5' => 'µ',
'%B6' => '¶',
'%B7' => '·',
'%B8' => '¸',
'%B9' => '¹',
'%BA' => 'º',
'%BB' => '»',
'%BC' => '¼',
'%BD' => '½',
'%BE' => '¾',
'%BF' => '¿',
'%C0' => 'À',
'%C1' => 'Á',
'%C2' => 'Â',
'%C3' => 'Ã',
'%C4' => 'Ä',
'%C5' => 'Å',
'%C6' => 'Æ',
'%C7' => 'Ç',
'%C8' => 'È',
'%C9' => 'É',
'%CA' => 'Ê',
'%CB' => 'Ë',
'%CC' => 'Ì',
'%CD' => 'Í',
'%CE' => 'Î',
'%CF' => 'Ï',
'%D0' => 'Ð',
'%D1' => 'Ñ',
'%D2' => 'Ò',
'%D3' => 'Ó',
'%D4' => 'Ô',
'%D5' => 'Õ',
'%D6' => 'Ö',
'%D7' => '×',
'%D8' => 'Ø',
'%D9' => 'Ù',
'%DA' => 'Ú',
'%DB' => 'Û',
'%DC' => 'Ü',
'%DD' => 'Ý',
'%DE' => 'Þ',
'%DF' => 'ß',
'%E0' => 'à',
'%E1' => 'á',
'%E2' => 'â',
'%E3' => 'ã',
'%E4' => 'ä',
'%E5' => 'å',
'%E6' => 'æ',
'%E7' => 'ç',
'%E8' => 'è',
'%E9' => 'é',
'%EA' => 'ê',
'%EB' => 'ë',
'%EC' => 'ì',
'%ED' => 'í',
'%EE' => 'î',
'%EF' => 'ï',
'%F0' => 'ð',
'%F1' => 'ñ',
'%F2' => 'ò',
'%F3' => 'ó',
'%F4' => 'ô',
'%F5' => 'õ',
'%F6' => 'ö',
'%F7' => '÷',
'%F8' => 'ø',
'%F9' => 'ù',
'%FA' => 'ú',
'%FB' => 'û',
'%FC' => 'ü',
'%FD' => 'ý',
'%FE' => 'þ',
'%FF' => 'ÿ',
];
}
/**
* Decodes a UTF-8 string to ISO-8859-1.
*
* EXAMPLE: UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'
*
* @param string $str The input string.
* @param bool $keep_utf8_chars
*
* @psalm-pure
*
* @return string
*
* @noinspection SuspiciousBinaryOperationInspection
*/
public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
{
if ($str === '') {
return '';
}
// save for later comparision
$str_backup = $str;
$len = \strlen($str);
if (self::$ORD === null) {
self::$ORD = self::getData('ord');
}
if (self::$CHR === null) {
self::$CHR = self::getData('chr');
}
$no_char_found = '?';
/** @noinspection ForeachInvariantsInspection */
for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
switch ($str[$i] & "\xF0") {
case "\xC0":
case "\xD0":
$c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
$str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
break;
/** @noinspection PhpMissingBreakStatementInspection */
case "\xF0":
++$i;
// no break
case "\xE0":
$str[$j] = $no_char_found;
$i += 2;
break;
default:
$str[$j] = $str[$i];
}
}
/** @var false|string $return - needed for PhpStan (stubs error) */
$return = \substr($str, 0, $j);
if ($return === false) {
$return = '';
}
if (
$keep_utf8_chars
&&
(int) self::strlen($return) >= (int) self::strlen($str_backup)
) {
return $str_backup;
}
return $return;
}
/**
* Encodes an ISO-8859-1 string to UTF-8.
*
* EXAMPLE: UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'
*
* @param string $str The input string.
*
* @psalm-pure
*
* @return string
*/
public static function utf8_encode(string $str): string
{
if ($str === '') {
return '';
}
/** @var false|string $str - the polyfill maybe return false */
$str = \utf8_encode($str);
/** @noinspection CallableParameterUseCaseInTypeContextInspection */
/** @psalm-suppress TypeDoesNotContainType */
if ($str === false) {
return '';
}
return $str;
}
/**
* fix -> utf8-win1252 chars
*
* @param string $str The input string.
*
* @psalm-pure
*
* @return string
*
* @deprecated please use "UTF8::fix_simple_utf8()"
*/
public static function utf8_fix_win1252_chars(string $str): string
{
return self::fix_simple_utf8($str);
}
/**
* Returns an array with all utf8 whitespace characters.
*
* @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
*
* @psalm-pure
*
* @return string[]
* An array with all known whitespace characters as values and the type of whitespace as keys
* as defined in above URL
*/
public static function whitespace_table(): array
{
return self::$WHITESPACE_TABLE;
}
/**
* Limit the number of words in a string.
*
* EXAMPLE: UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'
*
* @param string $str The input string.
* @param int $limit The limit of words as integer.
* @param string $str_add_on Replacement for the striped string.
*
* @psalm-pure
*
* @return string
*/
public static function words_limit(
string $str,
int $limit = 100,
string $str_add_on = '…'
): string {
if ($str === '' || $limit < 1) {
return '';
}
\preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
if (
!isset($matches[0])
||
\mb_strlen($str) === (int) \mb_strlen($matches[0])
) {
return $str;
}
return \rtrim($matches[0]) . $str_add_on;
}
/**
* Wraps a string to a given number of characters
*
* EXAMPLE: UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '
', true)); // 'Iñ
të
rn
ât
iô
nà
li
zæ
ti
øn'
*
* @see http://php.net/manual/en/function.wordwrap.php
*
* @param string $str The input string.
* @param int $width [optional] The column width.
* @param string $break [optional] The line is broken using the optional break parameter.
* @param bool $cut [optional]
* If the cut is set to true, the string is
* always wrapped at or before the specified width. So if you have
* a word that is larger than the given width, it is broken apart.
*
*
* @psalm-pure
*
* @return string
* The given string wrapped at the specified column.
*/
public static function wordwrap(
string $str,
int $width = 75,
string $break = "\n",
bool $cut = false
): string {
if ($str === '' || $break === '') {
return '';
}
$str_split = \explode($break, $str);
if ($str_split === false) {
return '';
}
/** @var string[] $charsArray */
$charsArray = [];
$word_split = '';
foreach ($str_split as $i => $i_value) {
if ($i) {
$charsArray[] = $break;
$word_split .= '#';
}
foreach (self::str_split($i_value) as $c) {
$charsArray[] = $c;
if ($c === ' ') {
$word_split .= ' ';
} else {
$word_split .= '?';
}
}
}
$str_return = '';
$j = 0;
$b = -1;
$i = -1;
$word_split = \wordwrap($word_split, $width, '#', $cut);
$max = \mb_strlen($word_split);
while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
for (++$i; $i < $b; ++$i) {
if (isset($charsArray[$j])) {
$str_return .= $charsArray[$j];
unset($charsArray[$j]);
}
++$j;
// prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
if ($i > $max) {
break 2;
}
}
if (
$break === $charsArray[$j]
||
$charsArray[$j] === ' '
) {
unset($charsArray[$j++]);
}
$str_return .= $break;
// prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
if ($b > $max) {
break;
}
}
return $str_return . \implode('', $charsArray);
}
/**
* Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
* ... so that we wrap the per line.
*
* @param string $str The input string.
* @param int $width [optional] The column width.
* @param string $break [optional] The line is broken using the optional break parameter.
* @param bool $cut [optional]
* If the cut is set to true, the string is
* always wrapped at or before the specified width. So if you have
* a word that is larger than the given width, it is broken apart.
*
* @param bool $add_final_break [optional]
* If this flag is true, then the method will add a $break at the end
* of the result string.
*
* @param string|null $delimiter [optional]
* You can change the default behavior, where we split the string by newline.
*
*
* @psalm-pure
*
* @return string
*/
public static function wordwrap_per_line(
string $str,
int $width = 75,
string $break = "\n",
bool $cut = false,
bool $add_final_break = true,
string $delimiter = null
): string {
if ($delimiter === null) {
$strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
} else {
$strings = \explode($delimiter, $str);
}
$string_helper_array = [];
if ($strings !== false) {
foreach ($strings as $value) {
$string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
}
}
if ($add_final_break) {
$final_break = $break;
} else {
$final_break = '';
}
return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
}
/**
* Returns an array of Unicode White Space characters.
*
* @psalm-pure
*
* @return string[]
* An array with numeric code point as key and White Space Character as value.
*/
public static function ws(): array
{
return self::$WHITESPACE;
}
/**
* Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
*
* EXAMPLE:
* UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
* //
* UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
*
*
* @see http://hsivonen.iki.fi/php-utf8/
*
* @param string $str The string to be checked.
* @param bool $strict Check also if the string is not UTF-16 or UTF-32.
*
* @psalm-pure
*
* @return bool
*
* @noinspection ReturnTypeCanBeDeclaredInspection
*/
private static function is_utf8_string(string $str, bool $strict = false)
{
if ($str === '') {
return true;
}
if ($strict) {
$is_binary = self::is_binary($str, true);
if ($is_binary && self::is_utf16($str, false) !== false) {
return false;
}
if ($is_binary && self::is_utf32($str, false) !== false) {
return false;
}
}
if (self::$SUPPORT['pcre_utf8']) {
// If even just the first character can be matched, when the /u
// modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
// invalid, nothing at all will match, even if the string contains
// some valid sequences
return \preg_match('/^./us', $str) === 1;
}
$mState = 0; // cached expected number of octets after the current octet
// until the beginning of the next UTF8 character sequence
$mUcs4 = 0; // cached Unicode character
$mBytes = 1; // cached expected number of octets in the current sequence
if (self::$ORD === null) {
self::$ORD = self::getData('ord');
}
$len = \strlen($str);
/** @noinspection ForeachInvariantsInspection */
for ($i = 0; $i < $len; ++$i) {
$in = self::$ORD[$str[$i]];
if ($mState === 0) {
// When mState is zero we expect either a US-ASCII character or a
// multi-octet sequence.
if ((0x80 & $in) === 0) {
// US-ASCII, pass straight through.
$mBytes = 1;
} elseif ((0xE0 & $in) === 0xC0) {
// First octet of 2 octet sequence.
$mUcs4 = $in;
$mUcs4 = ($mUcs4 & 0x1F) << 6;
$mState = 1;
$mBytes = 2;
} elseif ((0xF0 & $in) === 0xE0) {
// First octet of 3 octet sequence.
$mUcs4 = $in;
$mUcs4 = ($mUcs4 & 0x0F) << 12;
$mState = 2;
$mBytes = 3;
} elseif ((0xF8 & $in) === 0xF0) {
// First octet of 4 octet sequence.
$mUcs4 = $in;
$mUcs4 = ($mUcs4 & 0x07) << 18;
$mState = 3;
$mBytes = 4;
} elseif ((0xFC & $in) === 0xF8) {
/* First octet of 5 octet sequence.
*
* This is illegal because the encoded codepoint must be either
* (a) not the shortest form or
* (b) outside the Unicode range of 0-0x10FFFF.
* Rather than trying to resynchronize, we will carry on until the end
* of the sequence and let the later error handling code catch it.
*/
$mUcs4 = $in;
$mUcs4 = ($mUcs4 & 0x03) << 24;
$mState = 4;
$mBytes = 5;
} elseif ((0xFE & $in) === 0xFC) {
// First octet of 6 octet sequence, see comments for 5 octet sequence.
$mUcs4 = $in;
$mUcs4 = ($mUcs4 & 1) << 30;
$mState = 5;
$mBytes = 6;
} else {
// Current octet is neither in the US-ASCII range nor a legal first
// octet of a multi-octet sequence.
return false;
}
} elseif ((0xC0 & $in) === 0x80) {
// When mState is non-zero, we expect a continuation of the multi-octet
// sequence
// Legal continuation.
$shift = ($mState - 1) * 6;
$tmp = $in;
$tmp = ($tmp & 0x0000003F) << $shift;
$mUcs4 |= $tmp;
// Prefix: End of the multi-octet sequence. mUcs4 now contains the final
// Unicode code point to be output.
if (--$mState === 0) {
// Check for illegal sequences and code points.
//
// From Unicode 3.1, non-shortest form is illegal
if (
($mBytes === 2 && $mUcs4 < 0x0080)
||
($mBytes === 3 && $mUcs4 < 0x0800)
||
($mBytes === 4 && $mUcs4 < 0x10000)
||
($mBytes > 4)
||
// From Unicode 3.2, surrogate characters are illegal.
(($mUcs4 & 0xFFFFF800) === 0xD800)
||
// Code points outside the Unicode range are illegal.
($mUcs4 > 0x10FFFF)
) {
return false;
}
// initialize UTF8 cache
$mState = 0;
$mUcs4 = 0;
$mBytes = 1;
}
} else {
// ((0xC0 & (*in) != 0x80) && (mState != 0))
// Incomplete multi-octet sequence.
return false;
}
}
return $mState === 0;
}
/**
* @param string $str
* @param bool $use_lowercase Use uppercase by default, otherwise use lowercase.
* @param bool $use_full_case_fold Convert not only common cases.
*
* @psalm-pure
*
* @return string
*
* @noinspection ReturnTypeCanBeDeclaredInspection
*/
private static function fixStrCaseHelper(
string $str,
bool $use_lowercase = false,
bool $use_full_case_fold = false
) {
$upper = self::$COMMON_CASE_FOLD['upper'];
$lower = self::$COMMON_CASE_FOLD['lower'];
if ($use_lowercase) {
$str = \str_replace(
$upper,
$lower,
$str
);
} else {
$str = \str_replace(
$lower,
$upper,
$str
);
}
if ($use_full_case_fold) {
/**
* @psalm-suppress ImpureStaticVariable
*
* @var array|null
*/
static $FULL_CASE_FOLD = null;
if ($FULL_CASE_FOLD === null) {
$FULL_CASE_FOLD = self::getData('caseFolding_full');
}
if ($use_lowercase) {
$str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
} else {
$str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
}
}
return $str;
}
/**
* get data from "/data/*.php"
*
* @param string $file
*
* @psalm-pure
*
* @return array
*
* @noinspection ReturnTypeCanBeDeclaredInspection
*/
private static function getData(string $file)
{
/** @noinspection PhpIncludeInspection */
/** @noinspection UsingInclusionReturnValueInspection */
/** @psalm-suppress UnresolvableInclude */
return include __DIR__ . '/data/' . $file . '.php';
}
/**
* @psalm-pure
*
* @return true|null
*/
private static function initEmojiData()
{
if (self::$EMOJI_KEYS_CACHE === null) {
if (self::$EMOJI === null) {
self::$EMOJI = self::getData('emoji');
}
/**
* @psalm-suppress ImpureFunctionCall - static sort function is used
*/
\uksort(
self::$EMOJI,
static function (string $a, string $b): int {
return \strlen($b) <=> \strlen($a);
}
);
self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
self::$EMOJI_VALUES_CACHE = self::$EMOJI;
foreach (self::$EMOJI_KEYS_CACHE as $key) {
$tmp_key = \crc32($key);
self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
}
return true;
}
return null;
}
/**
* Checks whether mbstring "overloaded" is active on the server.
*
* @psalm-pure
*
* @return bool
*
* @noinspection ReturnTypeCanBeDeclaredInspection
*/
private static function mbstring_overloaded()
{
/**
* INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
*/
/** @noinspection PhpComposerExtensionStubsInspection */
/** @noinspection PhpUsageOfSilenceOperatorInspection */
return \defined('MB_OVERLOAD_STRING')
&&
((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
}
/**
* @param array $strings
* @param bool $remove_empty_values
* @param int|null $remove_short_values
*
* @psalm-pure
*
* @return array
*
* @noinspection ReturnTypeCanBeDeclaredInspection
*/
private static function reduce_string_array(
array $strings,
bool $remove_empty_values,
int $remove_short_values = null
) {
// init
$return = [];
foreach ($strings as &$str) {
if (
$remove_short_values !== null
&&
\mb_strlen($str) <= $remove_short_values
) {
continue;
}
if (
$remove_empty_values
&&
\trim($str) === ''
) {
continue;
}
$return[] = $str;
}
return $return;
}
/**
* rxClass
*
* @param string $s
* @param string $class
*
* @psalm-pure
*
* @return string
*
* @noinspection ReturnTypeCanBeDeclaredInspection
*/
private static function rxClass(string $s, string $class = '')
{
/**
* @psalm-suppress ImpureStaticVariable
*
* @var array
*/
static $RX_CLASS_CACHE = [];
$cache_key = $s . '_' . $class;
if (isset($RX_CLASS_CACHE[$cache_key])) {
return $RX_CLASS_CACHE[$cache_key];
}
/** @var string[] $class_array */
$class_array[] = $class;
/** @noinspection SuspiciousLoopInspection */
/** @noinspection AlterInForeachInspection */
foreach (self::str_split($s) as &$s) {
if ($s === '-') {
$class_array[0] = '-' . $class_array[0];
} elseif (!isset($s[2])) {
$class_array[0] .= \preg_quote($s, '/');
} elseif (self::strlen($s) === 1) {
$class_array[0] .= $s;
} else {
$class_array[] = $s;
}
}
if ($class_array[0]) {
$class_array[0] = '[' . $class_array[0] . ']';
}
if (\count($class_array) === 1) {
$return = $class_array[0];
} else {
$return = '(?:' . \implode('|', $class_array) . ')';
}
$RX_CLASS_CACHE[$cache_key] = $return;
return $return;
}
/**
* Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
*
* @param string $names
* @param string $delimiter
* @param string $encoding
*
* @psalm-pure
*
* @return string
*
* @noinspection ReturnTypeCanBeDeclaredInspection
*/
private static function str_capitalize_name_helper(
string $names,
string $delimiter,
string $encoding = 'UTF-8'
) {
// init
$name_helper_array = \explode($delimiter, $names);
if ($name_helper_array === false) {
return '';
}
$special_cases = [
'names' => [
'ab',
'af',
'al',
'and',
'ap',
'bint',
'binte',
'da',
'de',
'del',
'den',
'der',
'di',
'dit',
'ibn',
'la',
'mac',
'nic',
'of',
'ter',
'the',
'und',
'van',
'von',
'y',
'zu',
],
'prefixes' => [
'al-',
"d'",
'ff',
"l'",
'mac',
'mc',
'nic',
],
];
foreach ($name_helper_array as &$name) {
if (\in_array($name, $special_cases['names'], true)) {
continue;
}
$continue = false;
if ($delimiter === '-') {
/** @noinspection AlterInForeachInspection */
foreach ((array) $special_cases['names'] as &$beginning) {
if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
$continue = true;
break;
}
}
}
/** @noinspection AlterInForeachInspection */
foreach ((array) $special_cases['prefixes'] as &$beginning) {
if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
$continue = true;
break;
}
}
if ($continue) {
continue;
}
$name = self::ucfirst($name);
}
return \implode($delimiter, $name_helper_array);
}
/**
* Generic case-sensitive transformation for collation matching.
*
* @param string $str The input string
*
* @psalm-pure
*
* @return string|null
*/
private static function strtonatfold(string $str)
{
$str = \Normalizer::normalize($str, \Normalizer::NFD);
/** @phpstan-ignore-next-line - https://github.com/JetBrains/phpstorm-stubs/pull/949 */
if ($str === false) {
return '';
}
/** @noinspection PhpUndefinedClassInspection */
return \preg_replace(
'/\p{Mn}+/u',
'',
$str
);
}
/**
* @param int|string $input
*
* @psalm-pure
*
* @return string
*
* @noinspection ReturnTypeCanBeDeclaredInspection
* @noinspection SuspiciousBinaryOperationInspection
*/
private static function to_utf8_convert_helper($input)
{
// init
$buf = '';
if (self::$ORD === null) {
self::$ORD = self::getData('ord');
}
if (self::$CHR === null) {
self::$CHR = self::getData('chr');
}
if (self::$WIN1252_TO_UTF8 === null) {
self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
}
$ordC1 = self::$ORD[$input];
if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
$buf .= self::$WIN1252_TO_UTF8[$ordC1];
} else {
/** @noinspection OffsetOperationsInspection */
$cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
$cc2 = ((string) $input & "\x3F") | "\x80";
$buf .= $cc1 . $cc2;
}
return $buf;
}
/**
* @param string $str
*
* @psalm-pure
*
* @return string
*
* @noinspection ReturnTypeCanBeDeclaredInspection
*/
private static function urldecode_unicode_helper(string $str)
{
if (\strpos($str, '%u') === false) {
return $str;
}
$pattern = '/%u([0-9a-fA-F]{3,4})/';
if (\preg_match($pattern, $str)) {
$str = (string) \preg_replace($pattern, '\\1;', $str);
}
return $str;
}
}