| Server IP : 170.10.162.208 / Your IP : 216.73.216.181 Web Server : LiteSpeed System : Linux altar19.supremepanel19.com 4.18.0-553.69.1.lve.el8.x86_64 #1 SMP Wed Aug 13 19:53:59 UTC 2025 x86_64 User : deltahospital ( 1806) PHP Version : 7.4.33 Disable Function : NONE MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : OFF | Pkexec : OFF Directory : /tmp/ |
Upload File : |
package Unicode::UCD;
use strict;
use warnings;
no warnings 'surrogate'; # surrogates can be inputs to this
use charnames ();
our $VERSION = '0.69';
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(charinfo
charblock charscript
charblocks charscripts
charinrange
charprop
charprops_all
general_categories bidi_types
compexcl
casefold all_casefolds casespec
namedseq
num
prop_aliases
prop_value_aliases
prop_values
prop_invlist
prop_invmap
search_invlist
MAX_CP
);
use Carp;
sub IS_ASCII_PLATFORM { ord("A") == 65 }
=head1 NAME
Unicode::UCD - Unicode character database
=head1 SYNOPSIS
use Unicode::UCD 'charinfo';
my $charinfo = charinfo($codepoint);
use Unicode::UCD 'charprop';
my $value = charprop($codepoint, $property);
use Unicode::UCD 'charprops_all';
my $all_values_hash_ref = charprops_all($codepoint);
use Unicode::UCD 'casefold';
my $casefold = casefold($codepoint);
use Unicode::UCD 'all_casefolds';
my $all_casefolds_ref = all_casefolds();
use Unicode::UCD 'casespec';
my $casespec = casespec($codepoint);
use Unicode::UCD 'charblock';
my $charblock = charblock($codepoint);
use Unicode::UCD 'charscript';
my $charscript = charscript($codepoint);
use Unicode::UCD 'charblocks';
my $charblocks = charblocks();
use Unicode::UCD 'charscripts';
my $charscripts = charscripts();
use Unicode::UCD qw(charscript charinrange);
my $range = charscript($script);
print "looks like $script\n" if charinrange($range, $codepoint);
use Unicode::UCD qw(general_categories bidi_types);
my $categories = general_categories();
my $types = bidi_types();
use Unicode::UCD 'prop_aliases';
my @space_names = prop_aliases("space");
use Unicode::UCD 'prop_value_aliases';
my @gc_punct_names = prop_value_aliases("Gc", "Punct");
use Unicode::UCD 'prop_values';
my @all_EA_short_names = prop_values("East_Asian_Width");
use Unicode::UCD 'prop_invlist';
my @puncts = prop_invlist("gc=punctuation");
use Unicode::UCD 'prop_invmap';
my ($list_ref, $map_ref, $format, $missing)
= prop_invmap("General Category");
use Unicode::UCD 'search_invlist';
my $index = search_invlist(\@invlist, $code_point);
# The following function should be used only internally in
# implementations of the Unicode Normalization Algorithm, and there
# are better choices than it.
use Unicode::UCD 'compexcl';
my $compexcl = compexcl($codepoint);
use Unicode::UCD 'namedseq';
my $namedseq = namedseq($named_sequence_name);
my $unicode_version = Unicode::UCD::UnicodeVersion();
my $convert_to_numeric =
Unicode::UCD::num("\N{RUMI DIGIT ONE}\N{RUMI DIGIT TWO}");
=head1 DESCRIPTION
The Unicode::UCD module offers a series of functions that
provide a simple interface to the Unicode
Character Database.
=head2 code point argument
Some of the functions are called with a I<code point argument>, which is either
a decimal or a hexadecimal scalar designating a code point in the platform's
native character set (extended to Unicode), or a string containing C<U+>
followed by hexadecimals
designating a Unicode code point. A leading 0 will force a hexadecimal
interpretation, as will a hexadecimal digit that isn't a decimal digit.
Examples:
223 # Decimal 223 in native character set
0223 # Hexadecimal 223, native (= 547 decimal)
0xDF # Hexadecimal DF, native (= 223 decimal)
'0xDF' # String form of hexadecimal (= 223 decimal)
'U+DF' # Hexadecimal DF, in Unicode's character set
(= LATIN SMALL LETTER SHARP S)
Note that the largest code point in Unicode is U+10FFFF.
=cut
my $BLOCKSFH;
my $VERSIONFH;
my $CASEFOLDFH;
my $CASESPECFH;
my $NAMEDSEQFH;
my $v_unicode_version; # v-string.
sub openunicode {
my ($rfh, @path) = @_;
my $f;
unless (defined $$rfh) {
for my $d (@INC) {
use File::Spec;
$f = File::Spec->catfile($d, "unicore", @path);
last if open($$rfh, '<', $f);
undef $f;
}
croak __PACKAGE__, ": failed to find ",
File::Spec->catfile(@path), " in @INC"
unless defined $f;
}
return $f;
}
sub _dclone ($) { # Use Storable::dclone if available; otherwise emulate it.
use if defined &DynaLoader::boot_DynaLoader, Storable => qw(dclone);
return dclone(shift) if defined &dclone;
my $arg = shift;
my $type = ref $arg;
return $arg unless $type; # No deep cloning needed for scalars
if ($type eq 'ARRAY') {
my @return;
foreach my $element (@$arg) {
push @return, &_dclone($element);
}
return \@return;
}
elsif ($type eq 'HASH') {
my %return;
foreach my $key (keys %$arg) {
$return{$key} = &_dclone($arg->{$key});
}
return \%return;
}
else {
croak "_dclone can't handle " . $type;
}
}
=head2 B<charinfo()>
use Unicode::UCD 'charinfo';
my $charinfo = charinfo(0x41);
This returns information about the input L</code point argument>
as a reference to a hash of fields as defined by the Unicode
standard. If the L</code point argument> is not assigned in the standard
(i.e., has the general category C<Cn> meaning C<Unassigned>)
or is a non-character (meaning it is guaranteed to never be assigned in
the standard),
C<undef> is returned.
Fields that aren't applicable to the particular code point argument exist in the
returned hash, and are empty.
For results that are less "raw" than this function returns, or to get the values for
any property, not just the few covered by this function, use the
L</charprop()> function.
The keys in the hash with the meanings of their values are:
=over
=item B<code>
the input native L</code point argument> expressed in hexadecimal, with
leading zeros
added if necessary to make it contain at least four hexdigits
=item B<name>
name of I<code>, all IN UPPER CASE.
Some control-type code points do not have names.
This field will be empty for C<Surrogate> and C<Private Use> code points,
and for the others without a name,
it will contain a description enclosed in angle brackets, like
C<E<lt>controlE<gt>>.
=item B<category>
The short name of the general category of I<code>.
This will match one of the keys in the hash returned by L</general_categories()>.
The L</prop_value_aliases()> function can be used to get all the synonyms
of the category name.
=item B<combining>
the combining class number for I<code> used in the Canonical Ordering Algorithm.
For Unicode 5.1, this is described in Section 3.11 C<Canonical Ordering Behavior>
available at
L<http://www.unicode.org/versions/Unicode5.1.0/>
The L</prop_value_aliases()> function can be used to get all the synonyms
of the combining class number.
=item B<bidi>
bidirectional type of I<code>.
This will match one of the keys in the hash returned by L</bidi_types()>.
The L</prop_value_aliases()> function can be used to get all the synonyms
of the bidi type name.
=item B<decomposition>
is empty if I<code> has no decomposition; or is one or more codes
(separated by spaces) that, taken in order, represent a decomposition for
I<code>. Each has at least four hexdigits.
The codes may be preceded by a word enclosed in angle brackets, then a space,
like C<E<lt>compatE<gt> >, giving the type of decomposition
This decomposition may be an intermediate one whose components are also
decomposable. Use L<Unicode::Normalize> to get the final decomposition in one
step.
=item B<decimal>
if I<code> represents a decimal digit this is its integer numeric value
=item B<digit>
if I<code> represents some other digit-like number, this is its integer
numeric value
=item B<numeric>
if I<code> represents a whole or rational number, this is its numeric value.
Rational values are expressed as a string like C<1/