Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
utf8prober.py
Go to the documentation of this file.
27
28from typing import Union
29
30from .charsetprober import CharSetProber
31from .codingstatemachine import CodingStateMachine
32from .enums import MachineState, ProbingState
33from .mbcssm import UTF8_SM_MODEL
34
35
37 ONE_CHAR_PROB = 0.5
38
39 def __init__(self) -> None:
40 super().__init__()
41 self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
43 self.resetreset()
44
45 def reset(self) -> None:
46 super().reset()
47 self.coding_sm.reset()
48 self._num_mb_chars = 0
49
50 @property
51 def charset_name(self) -> str:
52 return "utf-8"
53
54 @property
55 def language(self) -> str:
56 return ""
57
58 def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
59 for c in byte_str:
60 coding_state = self.coding_sm.next_state(c)
61 if coding_state == MachineState.ERROR:
63 break
64 if coding_state == MachineState.ITS_ME:
66 break
67 if coding_state == MachineState.START:
68 if self.coding_sm.get_current_charlen() >= 2:
69 self._num_mb_chars += 1
70
74
75 return self.statestate
76
77 def get_confidence(self) -> float:
78 unlike = 0.99
79 if self._num_mb_chars < 6:
80 unlike *= self.ONE_CHAR_PROB**self._num_mb_chars
81 return 1.0 - unlike
82 return unlike
ProbingState feed(self, Union[bytes, bytearray] byte_str)
Definition utf8prober.py:58
for i