Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
eucjpprober.py
Go to the documentation of this file.
1
27
28
from
typing
import
Union
29
30
from
.chardistribution
import
EUCJPDistributionAnalysis
31
from
.codingstatemachine
import
CodingStateMachine
32
from
.enums
import
MachineState, ProbingState
33
from
.jpcntx
import
EUCJPContextAnalysis
34
from
.mbcharsetprober
import
MultiByteCharSetProber
35
from
.mbcssm
import
EUCJP_SM_MODEL
36
37
38
class
EUCJPProber
(
MultiByteCharSetProber
):
39
def
__init__
(self) -> None:
40
super
().
__init__
()
41
self.
coding_sm
=
CodingStateMachine
(EUCJP_SM_MODEL)
42
self.
distribution_analyzer
=
EUCJPDistributionAnalysis
()
43
self.
context_analyzer
=
EUCJPContextAnalysis
()
44
self.
reset
reset
reset
()
45
46
def
reset
(self) -> None:
47
super
().
reset
()
48
self.
context_analyzer
.
reset
()
49
50
@property
51
def
charset_name
(self) -> str:
52
return
"EUC-JP"
53
54
@property
55
def
language
(self) -> str:
56
return
"Japanese"
57
58
def
feed
(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
59
assert
self.
coding_sm
is
not
None
60
assert
self.
distribution_analyzer
is
not
None
61
62
for
i, byte
in
enumerate
(byte_str):
63
# PY3K: byte_str is a byte array, so byte is an int, not a byte
64
coding_state = self.
coding_sm
.next_state(byte)
65
if
coding_state ==
MachineState.ERROR
:
66
self.
logger
.debug(
67
"%s %s prober hit error at byte %s"
,
68
self.
charset_name
charset_name
charset_name
charset_name
,
69
self.
language
language
language
language
,
70
i,
71
)
72
self.
_state
_state
_state
=
ProbingState.NOT_ME
73
break
74
if
coding_state ==
MachineState.ITS_ME
:
75
self.
_state
_state
_state
=
ProbingState.FOUND_IT
76
break
77
if
coding_state ==
MachineState.START
:
78
char_len = self.
coding_sm
.get_current_charlen()
79
if
i == 0:
80
self.
_last_char
_last_char
[1] = byte
81
self.
context_analyzer
.
feed
(self.
_last_char
_last_char
, char_len)
82
self.
distribution_analyzer
.
feed
(self.
_last_char
_last_char
, char_len)
83
else
:
84
self.
context_analyzer
.
feed
(byte_str[i - 1 : i + 1], char_len)
85
self.
distribution_analyzer
.
feed
(byte_str[i - 1 : i + 1], char_len)
86
87
self.
_last_char
_last_char
[0] = byte_str[-1]
88
89
if
self.
state
state
state
==
ProbingState.DETECTING
:
90
if
self.
context_analyzer
.got_enough_data()
and
(
91
self.
get_confidence
get_confidence
get_confidence
() > self.
SHORTCUT_THRESHOLD
92
):
93
self.
_state
_state
_state
=
ProbingState.FOUND_IT
94
95
return
self.
state
state
state
96
97
def
get_confidence
(self) -> float:
98
assert
self.
distribution_analyzer
is
not
None
99
100
context_conf = self.
context_analyzer
.
get_confidence
()
101
distrib_conf = self.
distribution_analyzer
.
get_confidence
()
102
return
max(context_conf, distrib_conf)
pip._vendor.chardet.chardistribution.EUCJPDistributionAnalysis
Definition
chardistribution.py:246
pip._vendor.chardet.charsetprober.CharSetProber.logger
logger
Definition
charsetprober.py:48
pip._vendor.chardet.charsetprober.CharSetProber.reset
None reset(self)
Definition
charsetprober.py:50
pip._vendor.chardet.charsetprober.CharSetProber.state
ProbingState state(self)
Definition
charsetprober.py:65
pip._vendor.chardet.charsetprober.CharSetProber.SHORTCUT_THRESHOLD
float SHORTCUT_THRESHOLD
Definition
charsetprober.py:42
pip._vendor.chardet.charsetprober.CharSetProber.language
Optional[str] language(self)
Definition
charsetprober.py:58
pip._vendor.chardet.charsetprober.CharSetProber.charset_name
Optional[str] charset_name(self)
Definition
charsetprober.py:54
pip._vendor.chardet.charsetprober.CharSetProber.get_confidence
float get_confidence(self)
Definition
charsetprober.py:68
pip._vendor.chardet.charsetprober.CharSetProber._state
_state
Definition
charsetprober.py:45
pip._vendor.chardet.codingstatemachine.CodingStateMachine
Definition
codingstatemachine.py:34
pip._vendor.chardet.eucjpprober.EUCJPProber
Definition
eucjpprober.py:38
pip._vendor.chardet.eucjpprober.EUCJPProber.charset_name
str charset_name(self)
Definition
eucjpprober.py:51
pip._vendor.chardet.eucjpprober.EUCJPProber.reset
None reset(self)
Definition
eucjpprober.py:46
pip._vendor.chardet.eucjpprober.EUCJPProber._last_char
_last_char
Definition
eucjpprober.py:81
pip._vendor.chardet.eucjpprober.EUCJPProber.language
language
Definition
eucjpprober.py:69
pip._vendor.chardet.eucjpprober.EUCJPProber.coding_sm
coding_sm
Definition
eucjpprober.py:41
pip._vendor.chardet.eucjpprober.EUCJPProber.distribution_analyzer
distribution_analyzer
Definition
eucjpprober.py:42
pip._vendor.chardet.eucjpprober.EUCJPProber.__init__
None __init__(self)
Definition
eucjpprober.py:39
pip._vendor.chardet.eucjpprober.EUCJPProber.language
str language(self)
Definition
eucjpprober.py:55
pip._vendor.chardet.eucjpprober.EUCJPProber.state
state
Definition
eucjpprober.py:89
pip._vendor.chardet.eucjpprober.EUCJPProber.get_confidence
float get_confidence(self)
Definition
eucjpprober.py:97
pip._vendor.chardet.eucjpprober.EUCJPProber.charset_name
charset_name
Definition
eucjpprober.py:68
pip._vendor.chardet.eucjpprober.EUCJPProber.feed
ProbingState feed(self, Union[bytes, bytearray] byte_str)
Definition
eucjpprober.py:58
pip._vendor.chardet.eucjpprober.EUCJPProber._state
_state
Definition
eucjpprober.py:72
pip._vendor.chardet.eucjpprober.EUCJPProber.context_analyzer
context_analyzer
Definition
eucjpprober.py:43
pip._vendor.chardet.jpcntx.EUCJPContextAnalysis
Definition
jpcntx.py:219
pip._vendor.chardet.mbcharsetprober.MultiByteCharSetProber
Definition
mbcharsetprober.py:38
pip._vendor.chardet.mbcharsetprober.MultiByteCharSetProber.reset
None reset(self)
Definition
mbcharsetprober.py:49
pip._vendor.chardet.mbcharsetprober.MultiByteCharSetProber._last_char
_last_char
Definition
mbcharsetprober.py:47
pip._vendor.chardet.mbcharsetprober.MultiByteCharSetProber.language
language
Definition
mbcharsetprober.py:67
pip._vendor.chardet.mbcharsetprober.MultiByteCharSetProber.state
state
Definition
mbcharsetprober.py:85
pip._vendor.chardet.mbcharsetprober.MultiByteCharSetProber.get_confidence
float get_confidence(self)
Definition
mbcharsetprober.py:93
pip._vendor.chardet.mbcharsetprober.MultiByteCharSetProber.charset_name
charset_name
Definition
mbcharsetprober.py:66
pip._vendor.chardet.mbcharsetprober.MultiByteCharSetProber._state
_state
Definition
mbcharsetprober.py:70
i
for i
Definition
prime_search.m:10
venv
lib
python3.12
site-packages
pip
_vendor
chardet
eucjpprober.py
Generated by
1.9.8