Let us walk on the 3-isogeny graph
Loading...
Searching...
No Matches
chardetect.py
Go to the documentation of this file.
1"""
2Script which takes one or more file paths and reports on their detected
3encodings
4
5Example::
6
7 % chardetect somefile someotherfile
8 somefile: windows-1252 with confidence 0.5
9 someotherfile: ascii with confidence 1.0
10
11If no paths are provided, it takes its input from stdin.
12
13"""
14
15
16import argparse
17import sys
18from typing import Iterable, List, Optional
19
20from .. import __version__
21from ..universaldetector import UniversalDetector
22
23
25 lines: Iterable[bytes],
26 name: str = "stdin",
27 minimal: bool = False,
28 should_rename_legacy: bool = False,
29) -> Optional[str]:
30 """
31 Return a string describing the probable encoding of a file or
32 list of strings.
33
34 :param lines: The lines to get the encoding of.
35 :type lines: Iterable of bytes
36 :param name: Name of file or collection of lines
37 :type name: str
38 :param should_rename_legacy: Should we rename legacy encodings to
39 their more modern equivalents?
40 :type should_rename_legacy: ``bool``
41 """
42 u = UniversalDetector(should_rename_legacy=should_rename_legacy)
43 for line in lines:
44 line = bytearray(line)
45 u.feed(line)
46 # shortcut out of the loop to save reading further - particularly useful if we read a BOM.
47 if u.done:
48 break
49 u.close()
50 result = u.result
51 if minimal:
52 return result["encoding"]
53 if result["encoding"]:
54 return f'{name}: {result["encoding"]} with confidence {result["confidence"]}'
55 return f"{name}: no result"
56
57
58def main(argv: Optional[List[str]] = None) -> None:
59 """
60 Handles command line arguments and gets things started.
61
62 :param argv: List of arguments, as if specified on the command-line.
63 If None, ``sys.argv[1:]`` is used instead.
64 :type argv: list of str
65 """
66 # Get command line arguments
68 description=(
69 "Takes one or more file paths and reports their detected encodings"
70 )
71 )
73 "input",
74 help="File whose encoding we would like to determine. (default: stdin)",
75 type=argparse.FileType("rb"),
76 nargs="*",
77 default=[sys.stdin.buffer],
78 )
80 "--minimal",
81 help="Print only the encoding to standard output",
82 action="store_true",
83 )
85 "-l",
86 "--legacy",
87 help="Rename legacy encodings to more modern ones.",
88 action="store_true",
89 )
91 "--version", action="version", version=f"%(prog)s {__version__}"
92 )
93 args = parser.parse_args(argv)
94
95 for f in args.input:
96 if f.isatty():
97 print(
98 "You are running chardetect interactively. Press "
99 "CTRL-D twice at the start of a blank line to signal the "
100 "end of your input. If you want help, run chardetect "
101 "--help\n",
102 file=sys.stderr,
103 )
104 print(
106 f, f.name, minimal=args.minimal, should_rename_legacy=args.legacy
107 )
108 )
109
110
111if __name__ == "__main__":
112 main()
Optional[str] description_of(Iterable[bytes] lines, str name="stdin", bool minimal=False, bool should_rename_legacy=False)
Definition chardetect.py:29
for i