当前位置：源码 > Python源码 >

Python源码示例：unicodedata.name()

示例1

def main(argv):
  if len(argv) < 2:
    sys.exit('Must specify one or more font files.')

  cps = set()
  for filename in argv[1:]:
    if not os.path.isfile(filename):
      sys.exit('%s is not a file' % filename)
    cps |= fonts.CodepointsInFont(filename)

  for cp in sorted(cps):
    show_char = ''
    if FLAGS.show_char:
      show_char = (' ' + unichr(cp).strip() + ' ' +
                   unicodedata.name(unichr(cp), ''))
    show_subset = ''
    if FLAGS.show_subsets:
      show_subset = ' subset:%s' % ','.join(fonts.SubsetsForCodepoint(cp))

    print(u'0x%04X%s%s' % (cp, show_char, show_subset))

示例2

def main(global_delay, local_delay, concurrency):
    global global_sleep, local_sleep, semaphore, index
    global_sleep = global_delay
    local_sleep = local_delay
    semaphore = asyncio.Semaphore(concurrency)
    print('Global delay =', global_delay)
    print('Local delay =', local_delay)
    print('Max. concurrency =', concurrency)
    print('Building inverted index...')
    index = build_index()

    app = web.Application()
    app.router.add_get('/', usage)
    app.router.add_get('/index/{word}', index_for)
    app.router.add_get('/name/{char}', char_name)

    print('Listening on port', PORT)
    web.run_app(app, port=PORT)

示例3

def test_longstrings(self):
        # test long strings to check for memory overflow problems
        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
                   "backslashreplace"]
        # register the handlers under different names,
        # to prevent the codec from recognizing the name
        for err in errors:
            codecs.register_error("test." + err, codecs.lookup_error(err))
        l = 1000
        errors += [ "test." + err for err in errors ]
        for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
                        "utf-8", "utf-7", "utf-16", "utf-32"):
                for err in errors:
                    try:
                        uni.encode(enc, err)
                    except UnicodeError:
                        pass

示例4

def test_hangul_syllables(self):
        self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
        self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
        self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
        self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
        self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
        self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
        self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
        self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
        self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
        self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
        self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
        self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
        self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")

        import unicodedata
        self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")

示例5

def test_strict_eror_handling(self):
        # bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{blah}", 'unicode-escape', 'strict'
        )
        # long bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
        )
        # missing closing brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{SPACE", 'unicode-escape', 'strict'
        )
        # missing opening brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\NSPACE", 'unicode-escape', 'strict'
        )

示例6

def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '')

示例7

def test_longstrings(self):
        # test long strings to check for memory overflow problems
        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
                   "backslashreplace"]
        # register the handlers under different names,
        # to prevent the codec from recognizing the name
        for err in errors:
            codecs.register_error("test." + err, codecs.lookup_error(err))
        l = 1000
        errors += [ "test." + err for err in errors ]
        for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
                        "utf-8", "utf-7", "utf-16", "utf-32"):
                for err in errors:
                    try:
                        uni.encode(enc, err)
                    except UnicodeError:
                        pass

示例8

def test_hangul_syllables(self):
        self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
        self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
        self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
        self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
        self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
        self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
        self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
        self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
        self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
        self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
        self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
        self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
        self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")

        import unicodedata
        self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")

示例9

def test_strict_eror_handling(self):
        # bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{blah}", 'unicode-escape', 'strict'
        )
        # long bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
        )
        # missing closing brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{SPACE", 'unicode-escape', 'strict'
        )
        # missing opening brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\NSPACE", 'unicode-escape', 'strict'
        )

示例10

def get_unicode_index(symbol):
    """get_unicode_index(symbol) -> integer

Return the integer index (from the Unicode table) of symbol.  *symbol*
can be a single unicode character, a TeX command (i.e. r'\pi'), or a
Type1 symbol name (i.e. 'phi').
"""
    # From UTF #25: U+2212 minus sign is the preferred
    # representation of the unary and binary minus sign rather than
    # the ASCII-derived U+002D hyphen-minus, because minus sign is
    # unambiguous and because it is rendered with a more desirable
    # length, usually longer than a hyphen.
    if symbol == '-':
        return 0x2212
    try:# This will succeed if symbol is a single unicode char
        return ord(symbol)
    except TypeError:
        pass
    try:# Is symbol a TeX symbol (i.e. \alpha)
        return tex2uni[symbol.strip("\\")]
    except KeyError:
        message = """'%(symbol)s' is not a valid Unicode character or
TeX/Type1 symbol"""%locals()
        raise ValueError(message)

示例11

def render_glyph(self, ox, oy, facename, font_class, sym, fontsize, dpi):
        """
        Draw a glyph at

          - *ox*, *oy*: position

          - *facename*: One of the TeX face names

          - *font_class*:

          - *sym*: TeX symbol name or single character

          - *fontsize*: fontsize in points

          - *dpi*: The dpi to draw at.
        """
        info = self._get_info(facename, font_class, sym, fontsize, dpi)
        realpath, stat_key = get_realpath_and_stat(info.font.fname)
        used_characters = self.used_characters.setdefault(
            stat_key, (realpath, set()))
        used_characters[1].add(info.num)
        self.mathtext_backend.render_glyph(ox, oy, info)

示例12

def test_longstrings(self):
        # test long strings to check for memory overflow problems
        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
                   "backslashreplace"]
        # register the handlers under different names,
        # to prevent the codec from recognizing the name
        for err in errors:
            codecs.register_error("test." + err, codecs.lookup_error(err))
        l = 1000
        errors += [ "test." + err for err in errors ]
        for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
                        "utf-8", "utf-7", "utf-16", "utf-32"):
                for err in errors:
                    try:
                        uni.encode(enc, err)
                    except UnicodeError:
                        pass

示例13

def test_hangul_syllables(self):
        self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
        self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
        self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
        self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
        self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
        self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
        self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
        self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
        self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
        self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
        self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
        self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
        self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")

        import unicodedata
        self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")

示例14

def test_strict_eror_handling(self):
        # bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{blah}", 'unicode-escape', 'strict'
        )
        # long bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
        )
        # missing closing brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{SPACE", 'unicode-escape', 'strict'
        )
        # missing opening brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\NSPACE", 'unicode-escape', 'strict'
        )

示例15

def unicode(self, irc, msg, args, query):
        """[character]
        Look up unicode character details
        """
        url = "http://unicodelookup.com/lookup?"
        url = url + urlencode({"q": query, "o": 0})
        data = web.getUrl(url)
        try:
            data = json.loads(data)
            responses = []
            for result in data["results"]:
                ucode = result[2].replace("0x", "U+")
                name = unicodedata.name("{0}".format(query))
                responses.append(
                    "%s (%s): %s [HTML: %s / Decimal: %s / Hex: %s]"
                    % (ucode, name, result[4], result[3], result[1], result[2])
                )
            response = "; ".join(responses)
            irc.reply(response)
        except ValueError:
            irc.reply("No unicode characters matching /" + query + "/ found.")

示例16

def get_unicode_index(symbol):
    """get_unicode_index(symbol) -> integer

Return the integer index (from the Unicode table) of symbol.  *symbol*
can be a single unicode character, a TeX command (i.e. r'\pi'), or a
Type1 symbol name (i.e. 'phi').
"""
    # From UTF #25: U+2212 minus sign is the preferred
    # representation of the unary and binary minus sign rather than
    # the ASCII-derived U+002D hyphen-minus, because minus sign is
    # unambiguous and because it is rendered with a more desirable
    # length, usually longer than a hyphen.
    if symbol == '-':
        return 0x2212
    try:# This will succeed if symbol is a single unicode char
        return ord(symbol)
    except TypeError:
        pass
    try:# Is symbol a TeX symbol (i.e. \alpha)
        return tex2uni[symbol.strip("\\")]
    except KeyError:
        message = """'%(symbol)s' is not a valid Unicode character or
TeX/Type1 symbol"""%locals()
        raise ValueError(message)

示例17

def render_glyph(self, ox, oy, facename, font_class, sym, fontsize, dpi):
        """
        Draw a glyph at

          - *ox*, *oy*: position

          - *facename*: One of the TeX face names

          - *font_class*:

          - *sym*: TeX symbol name or single character

          - *fontsize*: fontsize in points

          - *dpi*: The dpi to draw at.
        """
        info = self._get_info(facename, font_class, sym, fontsize, dpi)
        realpath, stat_key = get_realpath_and_stat(info.font.fname)
        used_characters = self.used_characters.setdefault(
            stat_key, (realpath, set()))
        used_characters[1].add(info.num)
        self.mathtext_backend.render_glyph(ox, oy, info)

示例18

def remove_diacritics(self):
        """
        :return: str: the input string stripped of its diacritics

        Examples:
            >>> Word('ġelǣd').remove_diacritics()
            'gelæd'

        """

        w = ''
        for c in unicodedata.normalize('NFKD', self.word):
            if 'LATIN' == unicodedata.name(c)[:5]:
                w += c

        return w

示例19

def __init__(self, unicodeHexValue, block):
        """ Set up a unicode character.
        
        Arguments:
        unicodeHexValue -- an integer that should correspond to a 
                           Unicode code point.
        block -- the CharacterBlock this character belongs to.
        
        Raises:
        ValueError -- if unicodeHexValue is not a valid code point.
        
        """
        if unicodeHexValue < 0 or unicodeHexValue > 0x10FFFF:
            raise ValueError("numeric value outside Unicode range")
        self.unicodeHexValue = unicodeHexValue
        """ Use name check to filter out unused characters.
              unicodedata.name() raises ValueError for these
        """
        self.unichr = py23char(self.unicodeHexValue)
        self.name = unicodedata.name(self.unichr)
        self.equivalents = {}
        self._block = block

示例20

def _equivalent(self, char, prev, next, implicitA):
        """ Transliterate a Devanagari character to Latin.
        
        Add implicit As unless overridden by VIRAMA.
        
        """
        result = []
        if char.unichr != DevanagariCharacter._VIRAMA:
            result.append(char.equivalents[self.name])
        """ Append implicit A to consonants if the next character isn't a vowel. """
        if implicitA and char.isConsonant \
        and ((next is not None \
        and next.unichr != DevanagariCharacter._VIRAMA \
        and not next.isVowel) \
        or next is None):
            result.append(characterBlocks['DEVANAGARI']\
                   [DevanagariCharacter._LETTER_A].equivalents[self.name])
        return result

示例21

def __init__(self, unicodeHexValue, block):
        """ Set up a unicode character.
        
        Arguments:
        unicodeHexValue -- an integer that should correspond to a 
                           Unicode code point.
        block -- the CharacterBlock this character belongs to.
        
        Raises:
        ValueError -- if unicodeHexValue is not a valid code point.
        
        """
        if unicodeHexValue < 0 or unicodeHexValue > 0x10FFFF:
            raise (ValueError, "numeric value outside Unicode range")
        self.unicodeHexValue = unicodeHexValue
        """ Use name check to filter out unused characters.
              unicodedata.name() raises ValueError for these
        """
        self.chr = chr(self.unicodeHexValue)
        self.name = unicodedata.name(self.chr)
        self.equivalents = {}
        self._block = block

示例22

def _equivalent(self, char, prev, next, implicitA):
        """ Transliterate a Devanagari character to Latin.
        
        Add implicit As unless overridden by VIRAMA.
        
        """
        implicitA = False  # Force it!
        result = []
        if char.chr != DevanagariCharacter._VIRAMA:
            result.append(char.equivalents[self.name])
        """ Append implicit A to consonants if the next character isn't a vowel. """
        if implicitA and char.isConsonant \
        and ((next is not None \
        and next.chr != DevanagariCharacter._VIRAMA \
        and not next.isVowel) \
        or next is None):
            result.append(characterBlocks['DEVANAGARI']\
                   [DevanagariCharacter._LETTER_A].equivalents[self.name])
        return result

示例23

def _combining_class(cp):
    v = unicodedata.combining(unichr(cp))
    if v == 0:
        if not unicodedata.name(unichr(cp)):
            raise ValueError("Unknown character in unicodedata")
    return v

示例24

def _combining_class(cp):
    v = unicodedata.combining(unichr(cp))
    if v == 0:
        if not unicodedata.name(unichr(cp)):
            raise ValueError("Unknown character in unicodedata")
    return v

示例25

def _combining_class(cp):
    v = unicodedata.combining(unichr(cp))
    if v == 0:
        if not unicodedata.name(unichr(cp)):
            raise ValueError("Unknown character in unicodedata")
    return v

示例26

def find_unicodedata_name(data: str) -> list:
    """查询Unicode编码中的名字

    ♠ == BLACK SPADE SUIT
    \\N{BLACK SPADE SUIT} == ♠

    :param data: 字符串
    :return: 字符的Unicode名字列表
    """
    ls = []
    for i in data:
        ls.append(unicodedata.name(i))
    return ls

示例27

def charinfo(self, ctx: Context, *, characters: str) -> None:
        """Shows you information on up to 25 unicode characters."""
        match = re.match(r"<(a?):(\w+):(\d+)>", characters)
        if match:
            embed = Embed(
                title="Non-Character Detected",
                description=(
                    "Only unicode characters can be processed, but a custom Discord emoji "
                    "was found. Please remove it and try again."
                )
            )
            embed.colour = Colour.red()
            await ctx.send(embed=embed)
            return

        if len(characters) > 25:
            embed = Embed(title=f"Too many characters ({len(characters)}/25)")
            embed.colour = Colour.red()
            await ctx.send(embed=embed)
            return

        def get_info(char: str) -> Tuple[str, str]:
            digit = f"{ord(char):x}"
            if len(digit) <= 4:
                u_code = f"\\u{digit:>04}"
            else:
                u_code = f"\\U{digit:>08}"
            url = f"https://www.compart.com/en/unicode/U+{digit:>04}"
            name = f"[{unicodedata.name(char, '')}]({url})"
            info = f"`{u_code.ljust(10)}`: {name} - {utils.escape_markdown(char)}"
            return info, u_code

        charlist, rawlist = zip(*(get_info(c) for c in characters))

        embed = Embed(description="\n".join(charlist))
        embed.set_author(name="Character Info")

        if len(characters) > 1:
            embed.add_field(name='Raw', value=f"`{''.join(rawlist)}`", inline=False)

        await ctx.send(embed=embed)

示例28

def send_pep_zero(self, ctx: Context) -> None:
        """Send information about PEP 0."""
        pep_embed = Embed(
            title="**PEP 0 - Index of Python Enhancement Proposals (PEPs)**",
            description="[Link](https://www.python.org/dev/peps/)"
        )
        pep_embed.set_thumbnail(url=ICON_URL)
        pep_embed.add_field(name="Status", value="Active")
        pep_embed.add_field(name="Created", value="13-Jul-2000")
        pep_embed.add_field(name="Type", value="Informational")

        await ctx.send(embed=pep_embed)

示例29

def _ReformatLine(line):
  if line.startswith('0x'):
    codepoint = int(line[2:6], 16)
    out = unichr(codepoint) + ' ' + unicodedata.name(unichr(codepoint), '')
    return '0x%04X  %s' % (codepoint, out)
  else:
    return line

示例30

def validate_unicode_normalization(text):
    """
    Tests that letters composed of multiple Unicode characters (such as a base
    letter plus combining diacritics) conform to NFC normalization (canonical
    decomposition followed by canonical composition).
    """
    normalized_text = unicodedata.normalize('NFC', text)
    if text != normalized_text:
        # Find the first unmatched character and include it in the report.
        firsti = -1
        firstj = -1
        inpfirst = ''
        nfcfirst = ''
        tcols = text.split("\t")
        ncols = normalized_text.split("\t")
        for i in range(len(tcols)):
            for j in range(len(tcols[i])):
                if tcols[i][j] != ncols[i][j]:
                    firsti = i
                    firstj = j
                    inpfirst = unicodedata.name(tcols[i][j])
                    nfcfirst = unicodedata.name(ncols[i][j])
                    break
            if firsti >= 0:
                break
        testlevel = 1
        testclass = 'Unicode'
        testid = 'unicode-normalization'
        testmessage = "Unicode not normalized: %s.character[%d] is %s, should be %s." % (COLNAMES[firsti], firstj, inpfirst, nfcfirst)
        warn(testmessage, testclass, testlevel=testlevel, testid=testid)

Python源码示例：unicodedata.name()

微信关注