Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
sjisprober.py
Go to the documentation of this file.
27
28from typing import Union
29
30from .chardistribution import SJISDistributionAnalysis
31from .codingstatemachine import CodingStateMachine
32from .enums import MachineState, ProbingState
33from .jpcntx import SJISContextAnalysis
34from .mbcharsetprober import MultiByteCharSetProber
35from .mbcssm import SJIS_SM_MODEL
36
37
46 def reset(self) -> None:
47 super().reset()
49
50 @property
51 def charset_name(self) -> str:
52 return self.context_analyzer.charset_name
53
54 @property
55 def language(self) -> str:
56 return "Japanese"
57
58 def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
59 assert self.coding_sm is not None
60 assert self.distribution_analyzer is not None
61
62 for i, byte in enumerate(byte_str):
63 coding_state = self.coding_sm.next_state(byte)
64 if coding_state == MachineState.ERROR:
65 self.logger.debug(
66 "%s %s prober hit error at byte %s",
69 i,
70 )
72 break
73 if coding_state == MachineState.ITS_ME:
75 break
76 if coding_state == MachineState.START:
77 char_len = self.coding_sm.get_current_charlen()
78 if i == 0:
79 self._last_char_last_char[1] = byte
81 self._last_char_last_char[2 - char_len :], char_len
82 )
84 else:
86 byte_str[i + 1 - char_len : i + 3 - char_len], char_len
87 )
88 self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len)
89
90 self._last_char_last_char[0] = byte_str[-1]
91
93 if self.context_analyzer.got_enough_data() and (
95 ):
97
98 return self.statestatestate
99
100 def get_confidence(self) -> float:
101 assert self.distribution_analyzer is not None
102
103 context_conf = self.context_analyzer.get_confidence()
104 distrib_conf = self.distribution_analyzer.get_confidence()
105 return max(context_conf, distrib_conf)
ProbingState feed(self, Union[bytes, bytearray] byte_str)
Definition sjisprober.py:58
for i