Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
utf8prober.py
Go to the documentation of this file.
1
27
28
from
typing
import
Union
29
30
from
.charsetprober
import
CharSetProber
31
from
.codingstatemachine
import
CodingStateMachine
32
from
.enums
import
MachineState, ProbingState
33
from
.mbcssm
import
UTF8_SM_MODEL
34
35
36
class
UTF8Prober
(
CharSetProber
):
37
ONE_CHAR_PROB = 0.5
38
39
def
__init__
(self) -> None:
40
super
().
__init__
()
41
self.
coding_sm
=
CodingStateMachine
(UTF8_SM_MODEL)
42
self.
_num_mb_chars
= 0
43
self.
reset
reset
()
44
45
def
reset
(self) -> None:
46
super
().
reset
()
47
self.
coding_sm
.
reset
()
48
self.
_num_mb_chars
= 0
49
50
@property
51
def
charset_name
(self) -> str:
52
return
"utf-8"
53
54
@property
55
def
language
(self) -> str:
56
return
""
57
58
def
feed
(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
59
for
c
in
byte_str:
60
coding_state = self.
coding_sm
.next_state(c)
61
if
coding_state ==
MachineState.ERROR
:
62
self.
_state
_state
=
ProbingState.NOT_ME
63
break
64
if
coding_state ==
MachineState.ITS_ME
:
65
self.
_state
_state
=
ProbingState.FOUND_IT
66
break
67
if
coding_state ==
MachineState.START
:
68
if
self.
coding_sm
.get_current_charlen() >= 2:
69
self.
_num_mb_chars
+= 1
70
71
if
self.
state
state
==
ProbingState.DETECTING
:
72
if
self.
get_confidence
get_confidence
() > self.
SHORTCUT_THRESHOLD
:
73
self.
_state
_state
=
ProbingState.FOUND_IT
74
75
return
self.
state
state
76
77
def
get_confidence
(self) -> float:
78
unlike = 0.99
79
if
self.
_num_mb_chars
< 6:
80
unlike *= self.
ONE_CHAR_PROB
**self.
_num_mb_chars
81
return
1.0 - unlike
82
return
unlike
pip._vendor.chardet.charsetprober.CharSetProber
Definition
charsetprober.py:40
pip._vendor.chardet.charsetprober.CharSetProber.reset
None reset(self)
Definition
charsetprober.py:50
pip._vendor.chardet.charsetprober.CharSetProber.state
ProbingState state(self)
Definition
charsetprober.py:65
pip._vendor.chardet.charsetprober.CharSetProber.SHORTCUT_THRESHOLD
float SHORTCUT_THRESHOLD
Definition
charsetprober.py:42
pip._vendor.chardet.charsetprober.CharSetProber.get_confidence
float get_confidence(self)
Definition
charsetprober.py:68
pip._vendor.chardet.charsetprober.CharSetProber._state
_state
Definition
charsetprober.py:45
pip._vendor.chardet.codingstatemachine.CodingStateMachine
Definition
codingstatemachine.py:34
pip._vendor.chardet.utf8prober.UTF8Prober
Definition
utf8prober.py:36
pip._vendor.chardet.utf8prober.UTF8Prober.ONE_CHAR_PROB
float ONE_CHAR_PROB
Definition
utf8prober.py:37
pip._vendor.chardet.utf8prober.UTF8Prober.charset_name
str charset_name(self)
Definition
utf8prober.py:51
pip._vendor.chardet.utf8prober.UTF8Prober.reset
None reset(self)
Definition
utf8prober.py:45
pip._vendor.chardet.utf8prober.UTF8Prober.coding_sm
coding_sm
Definition
utf8prober.py:41
pip._vendor.chardet.utf8prober.UTF8Prober.__init__
None __init__(self)
Definition
utf8prober.py:39
pip._vendor.chardet.utf8prober.UTF8Prober.language
str language(self)
Definition
utf8prober.py:55
pip._vendor.chardet.utf8prober.UTF8Prober.state
state
Definition
utf8prober.py:71
pip._vendor.chardet.utf8prober.UTF8Prober.get_confidence
float get_confidence(self)
Definition
utf8prober.py:77
pip._vendor.chardet.utf8prober.UTF8Prober._num_mb_chars
_num_mb_chars
Definition
utf8prober.py:42
pip._vendor.chardet.utf8prober.UTF8Prober.feed
ProbingState feed(self, Union[bytes, bytearray] byte_str)
Definition
utf8prober.py:58
pip._vendor.chardet.utf8prober.UTF8Prober._state
_state
Definition
utf8prober.py:62
i
for i
Definition
prime_search.m:10
venv
lib
python3.12
site-packages
pip
_vendor
chardet
utf8prober.py
Generated by
1.9.8