Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
pip._vendor.chardet Namespace Reference

Namespaces

namespace  big5freq
 
namespace  big5prober
 
namespace  chardistribution
 
namespace  charsetgroupprober
 
namespace  charsetprober
 
namespace  cli
 
namespace  codingstatemachine
 
namespace  codingstatemachinedict
 
namespace  cp949prober
 
namespace  enums
 
namespace  escprober
 
namespace  escsm
 
namespace  eucjpprober
 
namespace  euckrfreq
 
namespace  euckrprober
 
namespace  euctwfreq
 
namespace  euctwprober
 
namespace  gb2312freq
 
namespace  gb2312prober
 
namespace  hebrewprober
 
namespace  jisfreq
 
namespace  johabfreq
 
namespace  johabprober
 
namespace  jpcntx
 
namespace  langbulgarianmodel
 
namespace  langgreekmodel
 
namespace  langhebrewmodel
 
namespace  langhungarianmodel
 
namespace  langrussianmodel
 
namespace  langthaimodel
 
namespace  langturkishmodel
 
namespace  latin1prober
 
namespace  macromanprober
 
namespace  mbcharsetprober
 
namespace  mbcsgroupprober
 
namespace  mbcssm
 
 
namespace  resultdict
 
namespace  sbcharsetprober
 
namespace  sbcsgroupprober
 
namespace  sjisprober
 
namespace  universaldetector
 
namespace  utf1632prober
 
namespace  utf8prober
 
namespace  version
 

Functions

ResultDict detect (Union[bytes, bytearray] byte_str, bool should_rename_legacy=False)
 
List[ResultDictdetect_all (Union[bytes, bytearray] byte_str, bool ignore_threshold=False, bool should_rename_legacy=False)
 

Function Documentation

◆ detect()

ResultDict detect ( Union[bytes, bytearray byte_str,
bool   should_rename_legacy = False 
)
Detect the encoding of the given byte string.

:param byte_str:     The byte sequence to examine.
:type byte_str:      ``bytes`` or ``bytearray``
:param should_rename_legacy:  Should we rename legacy encodings
                              to their more modern equivalents?
:type should_rename_legacy:   ``bool``

Definition at line 30 of file __init__.py.

32) -> ResultDict:
33 """
34 Detect the encoding of the given byte string.
35
36 :param byte_str: The byte sequence to examine.
37 :type byte_str: ``bytes`` or ``bytearray``
38 :param should_rename_legacy: Should we rename legacy encodings
39 to their more modern equivalents?
40 :type should_rename_legacy: ``bool``
41 """
42 if not isinstance(byte_str, bytearray):
43 if not isinstance(byte_str, bytes):
44 raise TypeError(
45 f"Expected object of type bytes or bytearray, got: {type(byte_str)}"
46 )
47 byte_str = bytearray(byte_str)
48 detector = UniversalDetector(should_rename_legacy=should_rename_legacy)
49 detector.feed(byte_str)
50 return detector.close()
51
52
for i

References i.

◆ detect_all()

List[ResultDict] detect_all ( Union[bytes, bytearray byte_str,
bool   ignore_threshold = False,
bool   should_rename_legacy = False 
)
Detect all the possible encodings of the given byte string.

:param byte_str:          The byte sequence to examine.
:type byte_str:           ``bytes`` or ``bytearray``
:param ignore_threshold:  Include encodings that are below
                          ``UniversalDetector.MINIMUM_THRESHOLD``
                          in results.
:type ignore_threshold:   ``bool``
:param should_rename_legacy:  Should we rename legacy encodings
                              to their more modern equivalents?
:type should_rename_legacy:   ``bool``

Definition at line 53 of file __init__.py.

57) -> List[ResultDict]:
58 """
59 Detect all the possible encodings of the given byte string.
60
61 :param byte_str: The byte sequence to examine.
62 :type byte_str: ``bytes`` or ``bytearray``
63 :param ignore_threshold: Include encodings that are below
64 ``UniversalDetector.MINIMUM_THRESHOLD``
65 in results.
66 :type ignore_threshold: ``bool``
67 :param should_rename_legacy: Should we rename legacy encodings
68 to their more modern equivalents?
69 :type should_rename_legacy: ``bool``
70 """
71 if not isinstance(byte_str, bytearray):
72 if not isinstance(byte_str, bytes):
73 raise TypeError(
74 f"Expected object of type bytes or bytearray, got: {type(byte_str)}"
75 )
76 byte_str = bytearray(byte_str)
77
78 detector = UniversalDetector(should_rename_legacy=should_rename_legacy)
79 detector.feed(byte_str)
81
83 results: List[ResultDict] = []
84 probers: List[CharSetProber] = []
85 for prober in detector.charset_probers:
86 if isinstance(prober, CharSetGroupProber):
88 else:
89 probers.append(prober)
90 for prober in probers:
91 if ignore_threshold or prober.get_confidence() > detector.MINIMUM_THRESHOLD:
92 charset_name = prober.charset_name or ""
93 lower_charset_name = charset_name.lower()
94 # Use Windows encoding name instead of ISO-8859 if we saw any
95 # extra Windows-specific bytes
97 charset_name = detector.ISO_WIN_MAP.get(
98 lower_charset_name, charset_name
99 )
100 # Rename legacy encodings with superset encodings if asked
101 if should_rename_legacy:
102 charset_name = detector.LEGACY_MAP.get(
103 charset_name.lower(), charset_name
104 )
106 {
107 "encoding": charset_name,
108 "confidence": prober.get_confidence(),
109 "language": prober.language,
110 }
111 )
112 if len(results) > 0:
113 return sorted(results, key=lambda result: -result["confidence"])
114
115 return [detector.result]

References i.