Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
escprober.py
Go to the documentation of this file.
1
27
28
from
typing
import
Optional, Union
29
30
from
.charsetprober
import
CharSetProber
31
from
.codingstatemachine
import
CodingStateMachine
32
from
.enums
import
LanguageFilter, MachineState, ProbingState
33
from
.escsm
import
(
34
HZ_SM_MODEL,
35
ISO2022CN_SM_MODEL,
36
ISO2022JP_SM_MODEL,
37
ISO2022KR_SM_MODEL,
38
)
39
40
41
class
EscCharSetProber
(
CharSetProber
):
42
"""
43
This CharSetProber uses a "code scheme" approach for detecting encodings,
44
whereby easily recognizable escape or shift sequences are relied on to
45
identify these encodings.
46
"""
47
48
def
__init__
(self, lang_filter: LanguageFilter =
LanguageFilter.NONE
) ->
None
:
49
super
().
__init__
(lang_filter=lang_filter)
50
self.
coding_sm
= []
51
if
self.
lang_filter
&
LanguageFilter.CHINESE_SIMPLIFIED
:
52
self.
coding_sm
.append(
CodingStateMachine
(HZ_SM_MODEL))
53
self.
coding_sm
.append(
CodingStateMachine
(ISO2022CN_SM_MODEL))
54
if
self.
lang_filter
&
LanguageFilter.JAPANESE
:
55
self.
coding_sm
.append(
CodingStateMachine
(ISO2022JP_SM_MODEL))
56
if
self.
lang_filter
&
LanguageFilter.KOREAN
:
57
self.
coding_sm
.append(
CodingStateMachine
(ISO2022KR_SM_MODEL))
58
self.
active_sm_count
= 0
59
self.
_detected_charset
: Optional[str] =
None
60
self.
_detected_language
: Optional[str] =
None
61
self.
_state
_state
=
ProbingState.DETECTING
62
self.
reset
reset
()
63
64
def
reset
(self) -> None:
65
super
().
reset
()
66
for
coding_sm
in
self.
coding_sm
:
67
coding_sm.active
=
True
68
coding_sm.reset
()
69
self.
active_sm_count
=
len
(self.
coding_sm
)
70
self.
_detected_charset
=
None
71
self.
_detected_language
=
None
72
73
@property
74
def
charset_name
(self) -> Optional[str]:
75
return
self.
_detected_charset
76
77
@property
78
def
language
(self) -> Optional[str]:
79
return
self.
_detected_language
80
81
def
get_confidence
(self) -> float:
82
return
0.99
if
self.
_detected_charset
else
0.00
83
84
def
feed
(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
85
for
c
in
byte_str:
86
for
coding_sm
in
self.
coding_sm
:
87
if
not
coding_sm.active
:
88
continue
89
coding_state =
coding_sm.next_state
(c)
90
if
coding_state ==
MachineState.ERROR
:
91
coding_sm.active
=
False
92
self.
active_sm_count
-= 1
93
if
self.
active_sm_count
<= 0:
94
self.
_state
_state
=
ProbingState.NOT_ME
95
return
self.
state
96
elif
coding_state ==
MachineState.ITS_ME
:
97
self.
_state
_state
=
ProbingState.FOUND_IT
98
self.
_detected_charset
=
coding_sm.get_coding_state_machine
()
99
self.
_detected_language
=
coding_sm.language
100
return
self.
state
101
102
return
self.
state
pip._vendor.chardet.charsetprober.CharSetProber
Definition
charsetprober.py:40
pip._vendor.chardet.charsetprober.CharSetProber.lang_filter
lang_filter
Definition
charsetprober.py:47
pip._vendor.chardet.charsetprober.CharSetProber.reset
None reset(self)
Definition
charsetprober.py:50
pip._vendor.chardet.charsetprober.CharSetProber.state
ProbingState state(self)
Definition
charsetprober.py:65
pip._vendor.chardet.charsetprober.CharSetProber._state
_state
Definition
charsetprober.py:45
pip._vendor.chardet.codingstatemachine.CodingStateMachine
Definition
codingstatemachine.py:34
pip._vendor.chardet.escprober.EscCharSetProber
Definition
escprober.py:41
pip._vendor.chardet.escprober.EscCharSetProber.active_sm_count
active_sm_count
Definition
escprober.py:58
pip._vendor.chardet.escprober.EscCharSetProber._detected_charset
_detected_charset
Definition
escprober.py:70
pip._vendor.chardet.escprober.EscCharSetProber._detected_language
_detected_language
Definition
escprober.py:71
pip._vendor.chardet.escprober.EscCharSetProber.reset
None reset(self)
Definition
escprober.py:64
pip._vendor.chardet.escprober.EscCharSetProber.coding_sm
coding_sm
Definition
escprober.py:50
pip._vendor.chardet.escprober.EscCharSetProber.language
Optional[str] language(self)
Definition
escprober.py:78
pip._vendor.chardet.escprober.EscCharSetProber.charset_name
Optional[str] charset_name(self)
Definition
escprober.py:74
pip._vendor.chardet.escprober.EscCharSetProber.__init__
None __init__(self, LanguageFilter lang_filter=LanguageFilter.NONE)
Definition
escprober.py:48
pip._vendor.chardet.escprober.EscCharSetProber.get_confidence
float get_confidence(self)
Definition
escprober.py:81
pip._vendor.chardet.escprober.EscCharSetProber.feed
ProbingState feed(self, Union[bytes, bytearray] byte_str)
Definition
escprober.py:84
pip._vendor.chardet.escprober.EscCharSetProber._state
_state
Definition
escprober.py:61
i
for i
Definition
prime_search.m:10
venv
lib
python3.12
site-packages
pip
_vendor
chardet
escprober.py
Generated by
1.9.8