Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
mbcharsetprober.py
Go to the documentation of this file.
29
30from typing import Optional, Union
31
32from .chardistribution import CharDistributionAnalysis
33from .charsetprober import CharSetProber
34from .codingstatemachine import CodingStateMachine
35from .enums import LanguageFilter, MachineState, ProbingState
36
37
39 """
40 MultiByteCharSetProber
41 """
42
43 def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
44 super().__init__(lang_filter=lang_filter)
45 self.distribution_analyzer: Optional[CharDistributionAnalysis] = None
46 self.coding_sm: Optional[CodingStateMachine] = None
47 self._last_char = bytearray(b"\0\0")
48
49 def reset(self) -> None:
50 super().reset()
51 if self.coding_sm:
52 self.coding_sm.reset()
53 if self.distribution_analyzer:
54 self.distribution_analyzer.reset()
55 self._last_char = bytearray(b"\0\0")
56
57 def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
58 assert self.coding_sm is not None
59 assert self.distribution_analyzer is not None
60
61 for i, byte in enumerate(byte_str):
62 coding_state = self.coding_sm.next_state(byte)
63 if coding_state == MachineState.ERROR:
64 self.logger.debug(
65 "%s %s prober hit error at byte %s",
68 i,
69 )
71 break
72 if coding_state == MachineState.ITS_ME:
74 break
75 if coding_state == MachineState.START:
76 char_len = self.coding_sm.get_current_charlen()
77 if i == 0:
78 self._last_char[1] = byte
79 self.distribution_analyzer.feed(self._last_char, char_len)
80 else:
81 self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len)
82
83 self._last_char[0] = byte_str[-1]
84
86 if self.distribution_analyzer.got_enough_data() and (
88 ):
90
91 return self.statestate
92
93 def get_confidence(self) -> float:
94 assert self.distribution_analyzer is not None
95 return self.distribution_analyzer.get_confidence()
None __init__(self, LanguageFilter lang_filter=LanguageFilter.NONE)
ProbingState feed(self, Union[bytes, bytearray] byte_str)
for i