Python源码示例:unicodedata.east_asian_width()

示例1
def strdisplaywidth(self, s):
        def get_char_display_width(unicode_str):
            r = unicodedata.east_asian_width(unicode_str)
            if r == "F":  # Fullwidth
                return 1
            elif r == "H":  # Half-width
                return 1
            elif r == "W":  # Wide
                return 2
            elif r == "Na":  # Narrow
                return 1
            elif r == "A":  # Ambiguous, go with 2
                return 1
            elif r == "N":  # Neutral
                return 1
            else:
                return 1

        s = unicodedata.normalize('NFC', s)
        w = 0
        for c in s:
            w += get_char_display_width(c)
        return w 
示例2
def cursor_left(self, n=1):
        """ESCnD CUB (Cursor Back)"""
        # Commented out to save CPU (and the others below too)
        #logging.debug('cursor_left(%s)' % n)
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_left:
            self.double_width_left = False
            return
        self.cursorX = max(0, self.cursorX - n) # Ensures positive value
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_left = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass 
示例3
def cursor_right(self, n=1):
        """ESCnC CUF (Cursor Forward)"""
        #logging.debug('cursor_right(%s)' % n)
        if not n:
            n = 1
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_right:
            self.double_width_right = False
            return
        self.cursorX += n
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_right = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass 
示例4
def get_east_asian_width(unicode_str):
    r = unicodedata.east_asian_width(unicode_str)
    if r == "F":    #  Fullwidth
        return 1
    elif r == "H":  #  Half-width
        return 1
    elif r == "W":  #  Wide
        return 2
    elif r == "Na": #  Narrow
        return 1
    elif r == "A":  #  Ambiguous, go with 2
        return 1
    elif r == "N":  #  Neutral
        return 1
    else:
        return 1 
示例5
def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '') 
示例6
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
示例7
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, unicode):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
示例8
def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line) 
示例9
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
示例10
def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line) 
示例11
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
示例12
def cursor_left(self, n=1):
        """ESCnD CUB (Cursor Back)"""
        # Commented out to save CPU (and the others below too)
        #logging.debug('cursor_left(%s)' % n)
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_left:
            self.double_width_left = False
            return
        self.cursorX = max(0, self.cursorX - n) # Ensures positive value
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_left = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass 
示例13
def cursor_right(self, n=1):
        """ESCnC CUF (Cursor Forward)"""
        #logging.debug('cursor_right(%s)' % n)
        if not n:
            n = 1
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_right:
            self.double_width_right = False
            return
        self.cursorX += n
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_right = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass 
示例14
def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '') 
示例15
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, unicode):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
示例16
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
示例17
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
示例18
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
示例19
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, unicode):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
示例20
def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line) 
示例21
def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if six.PY2:
    if isinstance(line, unicode):
      width = 0
      for uc in unicodedata.normalize('NFC', line):
        if unicodedata.east_asian_width(uc) in ('W', 'F'):
          width += 2
        elif not unicodedata.combining(uc):
          width += 1
      return width
  return len(line) 
示例22
def width(ch):
    """
    Compute the display width of the given character.

    Useful for cursor-repositioning tasks, however this is not entirely
    reliable since different terminal emulators have different behavior in
    this area.

    @see: U{http://unicode.org/reports/tr11/}

    @return: The width in 1/2 ems of the given single-length unicode string.
    @rtype: C{int}
    """
    widthSpecifier = unicodedata.east_asian_width(ch)
    try:
        return _widths[widthSpecifier]
    except KeyError:
        raise KeyError("%r has a width that is not supported: %s"
                       % (ch, widthSpecifier)) 
示例23
def strpad(self, s, width):
        if width < 1:
            return str()
        if '\n' in s:
            s = s.replace('\n', '\\n')

        # take into account double-width characters
        buf = str()
        buf_width = 0
        for c in s:
            w = 2 if unicodedata.east_asian_width(c) == 'W' else 1
            if buf_width + w > width:
                break
            buf_width += w
            buf += c

        if len(buf) < len(s):
            # truncation occurred
            while buf_width + len(self.trunc_char) > width:
                c = buf[-1]
                w = 2 if unicodedata.east_asian_width(c) == 'W' else 1
                buf = buf[0:-1]
                buf_width -= w
            buf += ' ' * (width - buf_width - len(self.trunc_char))
            buf += self.trunc_char
        elif buf_width < width:
            # padding required
            buf += ' ' * (width - buf_width)

        return buf 
示例24
def __cell_len_dw(s):
        """Return the number of character cells a string will take
        (double-width aware). Defined as self._cell_len in __init__

        """
        len = 0
        for c in s:
            w = 2 if unicodedata.east_asian_width(c) == 'W' else 1
            len += w
        return len 
示例25
def get_line_width(text):
    text = unicodedata.normalize('NFC', text)
    return sum(char_width.get(unicodedata.east_asian_width(c), 1) for c in text)


# XXX unify with _escaped func below 
示例26
def east_asian_len(data, encoding=None, ambiguous_width=1):
        """
        Calculate display width considering unicode East Asian Width
        """
        if isinstance(data, text_type):
            return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
        else:
            return len(data) 
示例27
def east_asian_len(data, encoding=None, ambiguous_width=1):
        """
        Calculate display width considering unicode East Asian Width
        """
        if isinstance(data, text_type):
            try:
                data = data.decode(encoding)
            except UnicodeError:
                pass
            return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
        else:
            return len(data) 
示例28
def _string_width(string, *, _IS_ASCII=_IS_ASCII):
    """Returns string's width."""
    match = _IS_ASCII.match(string)
    if match:
        return match.endpos

    UNICODE_WIDE_CHAR_TYPE = 'WFA'
    width = 0
    func = unicodedata.east_asian_width
    for char in string:
        width += 2 if func(char) in UNICODE_WIDE_CHAR_TYPE else 1
    return width 
示例29
def make_character_presentable(c, rp):
    if len(c) == 0:
        return c, 0  #  The result of an ignored failed decode from an invalid character.

    #  A character at this point should be a list of integers.
    for b in c:
        assert(type(b) == int)
    if rp.pretty_output:
        if len(c) == 1 and ((c[0] > 31 and c[0] < 127) or c[0] == ord('\t')):
            #  Standard ascii character
            if c[0] == ord('\t'):
                return [ord(u" "),ord(u" "),ord(u" "),ord(u" ")], 4
            else:
                return [c[0]], 1
        else:
            #  Extended ASCII characer or multi-byte character.
            rtn = []
            for byte in c:
                rtn += [py23_ord(b) for b in (b"\\x" + as_byte_string(format(byte, '02X'), rp.output_encoding, "internal"))]
            return rtn, len(rtn)
    else:
        #  This is not precise at all, but it is the best that can be done
        char_as_unicode = e_decode(int_array_as_byte_string(c), rp.output_encoding, "internal")
        if len(char_as_unicode) == 0:
            return [], 0  #  Happens sometimes due to decode failure on invalid characters. 
        east_asian_width = get_east_asian_width(char_as_unicode)
        replacement_chars = get_replacement_char(char_as_unicode)
        if replacement_chars is None:
            return c, east_asian_width
        else:
            ls = [get_east_asian_width(c) for c in replacement_chars]
            return [py23_ord(b) for b in as_byte_string(replacement_chars, rp.output_encoding, "internal")], sum(ls) 
示例30
def get_line_width(text):
    text = unicodedata.normalize('NFC', text)
    return sum(char_width.get(unicodedata.east_asian_width(c), 1) for c in text)


# XXX unify with _escaped func below