Python源码示例:locale.getpreferredencoding()
示例1
def test_not_ascii(): # NOCOV
"""
Make sure that the systems preferred encoding is not `ascii`.
Otherwise `click` is raising a RuntimeError for Python3. For a detailed
description of this very problem please consult the following gist:
https://gist.github.com/hackebrot/937245251887197ef542
This test also checks that `tox.ini` explicitly copies the according
system environment variables to the test environments.
"""
try:
preferred_encoding = locale.getpreferredencoding()
fs_enc = codecs.lookup(preferred_encoding).name
except Exception:
fs_enc = "ascii"
assert fs_enc != "ascii"
示例2
def get_process_output(process, encoding=None):
"""Get the output from the process."""
output = process.communicate()
returncode = process.returncode
if not encoding:
try:
encoding = sys.stdout.encoding
except Exception:
encoding = locale.getpreferredencoding()
if returncode != 0:
raise RuntimeError("Runtime Error: %s" % (output[0].rstrip().decode(encoding, errors='replace')))
return output[0].decode(encoding, errors='replace')
示例3
def filepath_from_subprocess_output(output):
"""
Convert `bytes` in the encoding used by a subprocess into a filesystem-appropriate `str`.
Inherited from `exec_command`, and possibly incorrect.
"""
mylocale = locale.getpreferredencoding(False)
if mylocale is None:
mylocale = 'ascii'
output = output.decode(mylocale, errors='replace')
output = output.replace('\r\n', '\n')
# Another historical oddity
if output[-1:] == '\n':
output = output[:-1]
# stdio uses bytes in python 2, so to avoid issues, we simply
# remove all non-ascii characters
if sys.version_info < (3, 0):
output = output.encode('ascii', errors='replace')
return output
示例4
def main(args=None):
if args is None:
args = sys.argv[1:]
# Configure our deprecation warnings to be sent through loggers
deprecation.install_warning_logger()
autocomplete()
try:
cmd_name, cmd_args = parseopts(args)
except PipError as exc:
sys.stderr.write("ERROR: %s" % exc)
sys.stderr.write(os.linesep)
sys.exit(1)
# Needed for locale.getpreferredencoding(False) to work
# in pip.utils.encoding.auto_decode
locale.setlocale(locale.LC_ALL, '')
command = commands_dict[cmd_name](isolated=check_isolated(cmd_args))
return command.main(cmd_args)
# ###########################################################
# # Writing freeze files
示例5
def read_text_file(filename):
"""Return the contents of *filename*.
Try to decode the file contents with utf-8, the preferred system encoding
(e.g., cp1252 on some Windows machines), and latin1, in that order.
Decoding a byte string with latin1 will never raise an error. In the worst
case, the returned string will contain some garbage characters.
"""
with open(filename, 'rb') as fp:
data = fp.read()
encodings = ['utf-8', locale.getpreferredencoding(False), 'latin1']
for enc in encodings:
try:
data = data.decode(enc)
except UnicodeDecodeError:
continue
break
assert type(data) != bytes # Latin1 should have worked.
return data
示例6
def run(self, edit, fname=None):
path = self.path
if not fname:
self.index = self.get_all()
files = self.get_selected(parent=False)
fname = join(path, files[0] if files else '')
else:
files = True
p, f = os.path.split(fname.rstrip(os.sep))
if not exists(fname):
return sublime.status_message(u'Directory doesn’t exist “%s”' % path)
if NT and path == 'ThisPC\\':
if not ST3:
fname = fname.encode(locale.getpreferredencoding(False))
return subprocess.Popen('explorer /select,"%s"' % fname)
if files:
self.view.window().run_command("open_dir", {"dir": p, "file": f})
else:
self.view.window().run_command("open_dir", {"dir": path})
示例7
def read_text_file(filename):
"""Return the contents of *filename*.
Try to decode the file contents with utf-8, the preferred system encoding
(e.g., cp1252 on some Windows machines), and latin1, in that order.
Decoding a byte string with latin1 will never raise an error. In the worst
case, the returned string will contain some garbage characters.
"""
with open(filename, 'rb') as fp:
data = fp.read()
encodings = ['utf-8', locale.getpreferredencoding(False), 'latin1']
for enc in encodings:
try:
data = data.decode(enc)
except UnicodeDecodeError:
continue
break
assert type(data) != bytes # Latin1 should have worked.
return data
示例8
def read_text_file(filename):
"""Return the contents of *filename*.
Try to decode the file contents with utf-8, the preferred system encoding
(e.g., cp1252 on some Windows machines), and latin1, in that order.
Decoding a byte string with latin1 will never raise an error. In the worst
case, the returned string will contain some garbage characters.
"""
with open(filename, 'rb') as fp:
data = fp.read()
encodings = ['utf-8', locale.getpreferredencoding(False), 'latin1']
for enc in encodings:
try:
data = data.decode(enc)
except UnicodeDecodeError:
continue
break
assert type(data) != bytes # Latin1 should have worked.
return data
示例9
def decode_as_string(text, encoding=None):
"""
Decode the console or file output explicitly using getpreferredencoding.
The text paraemeter should be a encoded string, if not no decode occurs
If no encoding is given, getpreferredencoding is used. If encoding is
specified, that is used instead. This would be needed for SVN --xml
output. Unicode is explicitly put in composed NFC form.
--xml should be UTF-8 (SVN Issue 2938) the discussion on the Subversion
DEV List from 2007 seems to indicate the same.
"""
#text should be a byte string
if encoding is None:
encoding = _console_encoding
if not isinstance(text, unicode):
text = text.decode(encoding)
text = unicodedata.normalize('NFC', text)
return text
示例10
def secret_set_value(uuid, password, options=None, encode=False, **dargs):
"""
Set a secret value
:param uuid: secret UUID
:param password: secret value
:param encode: if False, that means you've already provided a base64-encoded
password. if True, will base64-encode password before use it.
:return: CmdResult object.
"""
cmd = "secret-set-value --secret %s" % uuid
if password:
if encode:
encoding = locale.getpreferredencoding()
cmd += (" --base64 %s"
% base64.b64encode(password.encode(encoding)).decode(encoding))
else:
cmd += " --base64 %s" % password
if options:
cmd += " --%s" % options
return command(cmd, **dargs)
示例11
def _construct_parser(self, fname):
# type: (str) -> RawConfigParser
parser = configparser.RawConfigParser()
# If there is no such file, don't bother reading it but create the
# parser anyway, to hold the data.
# Doing this is useful when modifying and saving files, where we don't
# need to construct a parser.
if os.path.exists(fname):
try:
parser.read(fname)
except UnicodeDecodeError:
raise ConfigurationError((
"ERROR: "
"Configuration file contains invalid %s characters.\n"
"Please fix your configuration, located at %s\n"
) % (locale.getpreferredencoding(False), fname))
return parser
示例12
def main(args=None):
if args is None:
args = sys.argv[1:]
# Configure our deprecation warnings to be sent through loggers
deprecation.install_warning_logger()
autocomplete()
try:
cmd_name, cmd_args = parseopts(args)
except PipError as exc:
sys.stderr.write("ERROR: %s" % exc)
sys.stderr.write(os.linesep)
sys.exit(1)
# Needed for locale.getpreferredencoding(False) to work
# in pip._internal.utils.encoding.auto_decode
try:
locale.setlocale(locale.LC_ALL, '')
except locale.Error as e:
# setlocale can apparently crash if locale are uninitialized
logger.debug("Ignoring error %s when setting locale", e)
command = commands_dict[cmd_name](isolated=check_isolated(cmd_args))
return command.main(cmd_args)
示例13
def read_text_file(filename):
"""Return the contents of *filename*.
Try to decode the file contents with utf-8, the preferred system encoding
(e.g., cp1252 on some Windows machines), and latin1, in that order.
Decoding a byte string with latin1 will never raise an error. In the worst
case, the returned string will contain some garbage characters.
"""
with open(filename, 'rb') as fp:
data = fp.read()
encodings = ['utf-8', locale.getpreferredencoding(False), 'latin1']
for enc in encodings:
try:
data = data.decode(enc)
except UnicodeDecodeError:
continue
break
assert type(data) != bytes # Latin1 should have worked.
return data
示例14
def read_text_file(filename):
"""Return the contents of *filename*.
Try to decode the file contents with utf-8, the preferred system encoding
(e.g., cp1252 on some Windows machines), and latin1, in that order.
Decoding a byte string with latin1 will never raise an error. In the worst
case, the returned string will contain some garbage characters.
"""
with open(filename, 'rb') as fp:
data = fp.read()
encodings = ['utf-8', locale.getpreferredencoding(False), 'latin1']
for enc in encodings:
try:
data = data.decode(enc)
except UnicodeDecodeError:
continue
break
assert type(data) != bytes # Latin1 should have worked.
return data
示例15
def read_text_file(filename):
"""Return the contents of *filename*.
Try to decode the file contents with utf-8, the preferred system encoding
(e.g., cp1252 on some Windows machines), and latin1, in that order.
Decoding a byte string with latin1 will never raise an error. In the worst
case, the returned string will contain some garbage characters.
"""
with open(filename, 'rb') as fp:
data = fp.read()
encodings = ['utf-8', locale.getpreferredencoding(False), 'latin1']
for enc in encodings:
try:
data = data.decode(enc)
except UnicodeDecodeError:
continue
break
assert type(data) != bytes # Latin1 should have worked.
return data
示例16
def lnpgettext(self, context, singular, plural, num):
"""Equivalent to ``npgettext()``, but the translation is returned in the
preferred system encoding, if no other encoding was explicitly set with
``bind_textdomain_codeset()``.
"""
ctxt_msg_id = self.CONTEXT_ENCODING % (context, singular)
try:
tmsg = self._catalog[(ctxt_msg_id, self.plural(num))]
if self._output_charset:
return tmsg.encode(self._output_charset)
return tmsg.encode(locale.getpreferredencoding())
except KeyError:
if self._fallback:
return self._fallback.lnpgettext(context, singular, plural, num)
if num == 1:
return singular
else:
return plural
示例17
def _construct_parser(self, fname):
# type: (str) -> RawConfigParser
parser = configparser.RawConfigParser()
# If there is no such file, don't bother reading it but create the
# parser anyway, to hold the data.
# Doing this is useful when modifying and saving files, where we don't
# need to construct a parser.
if os.path.exists(fname):
try:
parser.read(fname)
except UnicodeDecodeError:
raise ConfigurationError((
"ERROR: "
"Configuration file contains invalid %s characters.\n"
"Please fix your configuration, located at %s\n"
) % (locale.getpreferredencoding(False), fname))
return parser
示例18
def start(self, no_delay):
self.window = curses.initscr()
curses.start_color()
curses.use_default_colors()
curses.noecho()
curses.cbreak()
curses.curs_set(0)
self.window.nodelay(no_delay)
self.init_colors()
self.window.bkgd(curses.color_pair(self.WHITE))
locale.setlocale(locale.LC_ALL, '') # set your locale
self.code = locale.getpreferredencoding()
示例19
def test_read_text_file(smb_share):
file_path = "%s\\%s" % (smb_share, "file.txt")
file_contents = u"File Contents\nNewline"
expected = "[NtStatus 0xc0000034] No such file or directory"
with pytest.raises(SMBOSError, match=re.escape(expected)):
smbclient.open_file(file_path, mode='rb')
with smbclient.open_file(file_path, mode='wb') as fd:
fd.write(file_contents.encode('utf-8'))
with smbclient.open_file(file_path) as fd:
assert isinstance(fd, io.TextIOWrapper)
assert fd.closed is False
assert fd.encoding == locale.getpreferredencoding()
assert fd.errors == 'strict'
assert fd.line_buffering is False
assert fd.name == file_path
assert fd.newlines is None
actual = fd.read()
assert actual == file_contents
actual = fd.read()
assert actual == ""
fd.seek(0)
actual = fd.readlines()
expected_lines = file_contents.split("\n")
expected = [l + "\n" if idx != len(expected_lines) - 1 else l for idx, l in enumerate(expected_lines)]
assert actual == expected
assert int(fd.tell()) == len(file_contents)
with pytest.raises(IOError):
fd.write(u"Fail")
assert fd.closed
示例20
def write(self, file_or_filename,
encoding=None,
xml_declaration=None,
default_namespace=None,
method=None):
if not method:
method = "xml"
elif method not in _serialize:
raise ValueError("unknown method %r" % method)
if not encoding:
if method == "c14n":
encoding = "utf-8"
else:
encoding = "us-ascii"
else:
encoding = encoding.lower()
with _get_writer(file_or_filename, encoding) as write:
if method == "xml" and (xml_declaration or
(xml_declaration is None and
encoding not in ("utf-8", "us-ascii", "unicode"))):
declared_encoding = encoding
if encoding == "unicode":
# Retrieve the default encoding for the xml declaration
import locale
declared_encoding = locale.getpreferredencoding()
write("<?xml version='1.0' encoding='%s'?>\n" % (
declared_encoding,))
if method == "text":
_serialize_text(write, self._root)
else:
qnames, namespaces = _namespaces(self._root, default_namespace)
serialize = _serialize[method]
serialize(write, self._root, qnames, namespaces)
示例21
def detect_encoding(data=None):
"""Return the default system encoding. If data is passed, try
to decode the data with the default system encoding or from a short
list of encoding types to test.
Args:
data - list of lists
Returns:
enc - system encoding
"""
enc_list = ['utf-8', 'latin-1', 'iso8859-1', 'iso8859-2',
'utf-16', 'cp720']
code = locale.getpreferredencoding(False)
if data is None:
return code
if code.lower() not in enc_list:
enc_list.insert(0, code.lower())
for c in enc_list:
try:
for line in data:
line.decode(c)
except (UnicodeDecodeError, UnicodeError, AttributeError):
continue
return c
print("Encoding not detected. Please pass encoding value manually")
示例22
def _decode_stdoutdata(stdoutdata):
""" Convert data read from stdout/stderr to unicode """
if not isinstance(stdoutdata, bytes):
return stdoutdata
encoding = getattr(sys.__stdout__, "encoding", locale.getpreferredencoding())
if encoding is None:
return stdoutdata.decode()
return stdoutdata.decode(encoding)
##########################################################################
# Import Stdlib Module
##########################################################################
示例23
def _on_finished(self, code, status):
"""Show a message when the process finished."""
self._started = False
log.procs.debug("Process finished with code {}, status {}.".format(
code, status))
encoding = locale.getpreferredencoding(do_setlocale=False)
stderr = self._proc.readAllStandardError().data().decode(
encoding, 'replace')
stdout = self._proc.readAllStandardOutput().data().decode(
encoding, 'replace')
if self._output_messages:
if stdout:
message.info(stdout.strip())
if stderr:
message.error(stderr.strip())
if status == QProcess.CrashExit:
exitinfo = "{} crashed!".format(self._what.capitalize())
message.error(exitinfo)
elif status == QProcess.NormalExit and code == 0:
exitinfo = "{} exited successfully.".format(
self._what.capitalize())
if self.verbose:
message.info(exitinfo)
else:
assert status == QProcess.NormalExit
# We call this 'status' here as it makes more sense to the user -
# it's actually 'code'.
exitinfo = ("{} exited with status {}, see :messages for "
"details.").format(self._what.capitalize(), code)
message.error(exitinfo)
if stdout:
log.procs.error("Process stdout:\n" + stdout.strip())
if stderr:
log.procs.error("Process stderr:\n" + stderr.strip())
qutescheme.spawn_output = self._spawn_format(exitinfo, stdout, stderr)
示例24
def _get_encoding(encoding, stream):
encoding = encoding or getattr(stream, 'encoding', None)
if not encoding:
import locale
encoding = locale.getpreferredencoding()
return encoding
示例25
def encode(self, string):
if not isinstance(string, six.text_type):
return string
encoding = (getattr(self.out, 'encoding', None) or
locale.getpreferredencoding(do_setlocale=False) or
sys.getdefaultencoding())
# errors=replace, we don't want to crash when attempting to show
# source code line that can't be encoded with the current locale
# settings
return string.encode(encoding, 'replace')
示例26
def decode_default(bytes):
if type(bytes) is not str:
raise NotBytesException(bytes)
guess = chardet.detect(bytes)
encodings = {
'sys.stdout.encoding': sys.stdout.encoding,
'locale.getpreferredencoding': locale.getpreferredencoding(),
'chardet.detect': guess['encoding'],
'utf-8': 'utf-8',
'latin1': 'latin1',
}
for encoding in encodings.values():
if encoding and encoding != 'ascii':
try:
return bytes.decode(encoding)
except UnicodeDecodeError:
continue
try:
return bytes.decode('ascii', errors='replace')
except UnicodeDecodeError:
logger.error(
'Decoding failed using the following encodings: "{}"'.format(
','.join(
['{}:{}'.format(f, e) for f, e in encodings.items()]
)))
return 'Unknown'
示例27
def test_utf8_file_nodtype_unicode(self):
# bytes encoding with non-latin1 -> unicode upcast
utf8 = u'\u03d6'
latin1 = u'\xf6\xfc\xf6'
# skip test if cannot encode utf8 test string with preferred
# encoding. The preferred encoding is assumed to be the default
# encoding of io.open. Will need to change this for PyTest, maybe
# using pytest.mark.xfail(raises=***).
try:
encoding = locale.getpreferredencoding()
utf8.encode(encoding)
except (UnicodeError, ImportError):
pytest.skip('Skipping test_utf8_file_nodtype_unicode, '
'unable to encode utf8 in preferred encoding')
with temppath() as path:
with io.open(path, "wt") as f:
f.write(u"norm1,norm2,norm3\n")
f.write(u"norm1," + latin1 + u",norm3\n")
f.write(u"test1,testNonethe" + utf8 + u",test3\n")
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '',
np.VisibleDeprecationWarning)
test = np.genfromtxt(path, dtype=None, comments=None,
delimiter=',')
# Check for warning when encoding not specified.
assert_(w[0].category is np.VisibleDeprecationWarning)
ctl = np.array([
["norm1", "norm2", "norm3"],
["norm1", latin1, "norm3"],
["test1", "testNonethe" + utf8, "test3"]],
dtype=np.unicode)
assert_array_equal(test, ctl)
示例28
def detect_console_encoding():
"""
Try to find the most capable encoding supported by the console.
slightly modified from the way IPython handles the same issue.
"""
global _initial_defencoding
encoding = None
try:
encoding = sys.stdout.encoding or sys.stdin.encoding
except (AttributeError, IOError):
pass
# try again for something better
if not encoding or 'ascii' in encoding.lower():
try:
encoding = locale.getpreferredencoding()
except Exception:
pass
# when all else fails. this will usually be "ascii"
if not encoding or 'ascii' in encoding.lower():
encoding = sys.getdefaultencoding()
# GH3360, save the reported defencoding at import time
# MPL backends may change it. Make available for debugging.
if not _initial_defencoding:
_initial_defencoding = sys.getdefaultencoding()
return encoding
示例29
def main(args=None):
if args is None:
args = sys.argv[1:]
# Configure our deprecation warnings to be sent through loggers
deprecation.install_warning_logger()
autocomplete()
try:
cmd_name, cmd_args = parseopts(args)
except PipError as exc:
sys.stderr.write("ERROR: %s" % exc)
sys.stderr.write(os.linesep)
sys.exit(1)
# Needed for locale.getpreferredencoding(False) to work
# in pip.utils.encoding.auto_decode
try:
locale.setlocale(locale.LC_ALL, '')
except locale.Error as e:
# setlocale can apparently crash if locale are uninitialized
logger.debug("Ignoring error %s when setting locale", e)
command = commands_dict[cmd_name](isolated=check_isolated(cmd_args))
return command.main(cmd_args)
# ###########################################################
# # Writing freeze files
示例30
def auto_decode(data):
"""Check a bytes string for a BOM to correctly detect the encoding
Fallback to locale.getpreferredencoding(False) like open() on Python3"""
for bom, encoding in BOMS:
if data.startswith(bom):
return data[len(bom):].decode(encoding)
# Lets check the first two lines as in PEP263
for line in data.split(b'\n')[:2]:
if line[0:1] == b'#' and ENCODING_RE.search(line):
encoding = ENCODING_RE.search(line).groups()[0].decode('ascii')
return data.decode(encoding)
return data.decode(locale.getpreferredencoding(False))