symbols_distances.gnuplot: use more informative titles
[SaveMySugar/python3-savemysugar.git] / src / savemysugar / MorseTranslator.py
1 #!/usr/bin/env python3
2 #
3 # MorseTranslator - translate to and from Morse code
4 #
5 # Copyright (C) 2015  Antonio Ospite <ao2@ao2.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20 import re
21
22
23 class MorseTranslator(object):
24     """International Morse Code translator.
25
26     The specification of the International Morse Code is in ITU-R M.1677-1
27     (10/2009), Annex 1.
28
29     The terminology used here may differ from the one used in some other
30     places, so here is some nomenclature:
31
32         symbol: one of . (dot), - (dash), ' ' (signal separator),
33             '/' (word separator)
34
35         character: a letter of the alphabet, a number, a punctuation mark, or
36             a ' ' (text word separator)
37
38         signal: a sequence of . and - symbols which encode a character,
39             or a '/' (Morse word separator)
40
41         word: a sequence of characters not containing a ' ', or
42             a sequence of signals not containing a '/'
43
44         text: a sequence of characters
45
46         morse: a sequence of signals separated by ' '
47
48     NOTE:
49     signals are separated by a ' ' (signal separator), characters are not
50     separated one from the other.
51
52     This class defines a subset of the signals in Section 1 of the
53     aforementioned specification, plus a word space, and it does not make
54     assumptions about their actual transmission.
55     """
56
57     def __init__(self):
58         self.signals_table = {}
59         self.characters_table = {}
60
61         # XXX the current code only handles single characters,
62         # so prosigns are not added to the tables below
63
64         # Letters
65         self.signals_table['a'] = ".-"
66         self.signals_table['b'] = "-..."
67         self.signals_table['c'] = "-.-."
68         self.signals_table['d'] = "-.."
69         self.signals_table['e'] = "."
70         self.signals_table['f'] = "..-."
71         self.signals_table['g'] = "--."
72         self.signals_table['h'] = "...."
73         self.signals_table['i'] = ".."
74         self.signals_table['j'] = ".---"
75         self.signals_table['k'] = "-.-"
76         self.signals_table['l'] = ".-.."
77         self.signals_table['m'] = "--"
78         self.signals_table['n'] = "-."
79         self.signals_table['o'] = "---"
80         self.signals_table['p'] = ".--."
81         self.signals_table['q'] = "--.-"
82         self.signals_table['r'] = ".-."
83         self.signals_table['s'] = "..."
84         self.signals_table['t'] = "-"
85         self.signals_table['u'] = "..-"
86         self.signals_table['v'] = "...-"
87         self.signals_table['w'] = ".--"
88         self.signals_table['x'] = "-..-"
89         self.signals_table['y'] = "-.--"
90         self.signals_table['z'] = "--.."
91         # Figures
92         self.signals_table['1'] = ".----"
93         self.signals_table['2'] = "..---"
94         self.signals_table['3'] = "...--"
95         self.signals_table['4'] = "....-"
96         self.signals_table['5'] = "....."
97         self.signals_table['6'] = "-...."
98         self.signals_table['7'] = "--..."
99         self.signals_table['8'] = "---.."
100         self.signals_table['9'] = "----."
101         self.signals_table['0'] = "-----"
102         # Punctuation marks and miscellaneous signs
103         self.signals_table['.'] = ".-.-.-"
104         self.signals_table[','] = "--..--"
105         self.signals_table[':'] = "---..."
106         self.signals_table['?'] = "..--.."
107         self.signals_table['\''] = ".----."
108         self.signals_table['-'] = "-....-"
109         self.signals_table['/'] = "-..-."
110         self.signals_table['('] = "-.--."
111         self.signals_table[')'] = "-.--.-"
112         self.signals_table['"'] = ".-..-."
113         self.signals_table['='] = "-...-"
114         self.signals_table['+'] = ".-.-."
115         self.signals_table['x'] = "-..-"
116         self.signals_table['@'] = ".--.-."
117
118         # Represent the word space as a signal with only one "/" symbol
119         self.signals_table[' '] = "/"
120
121         for key, value in self.signals_table.items():
122             self.characters_table[value] = key
123
124     def stats(self):
125         signal_length_sum = 0
126         for signal in self.signals_table.values():
127             signal_length_sum += len(signal)
128
129         average_signal_length = signal_length_sum / len(self.signals_table)
130
131         character_length_sum = 0
132         for character in self.characters_table.values():
133             character_length_sum += len(character)
134
135         average_char_length = character_length_sum / len(self.characters_table)
136
137         return average_signal_length, average_char_length
138
139     def sanitize_text(self, text):
140         sanitized = text.lower()
141         sanitized = re.sub(r"[^a-z0-9.,:?\'-/()\"=\+@ ]", "", sanitized)
142         sanitized = re.sub(r"\s+", " ", sanitized)
143         return sanitized
144
145     def char_to_signal(self, character):
146         char = character.lower()
147         if char in self.signals_table:
148             return self.signals_table[char]
149         else:
150             return ""
151
152     def text_to_morse(self, text, sanitize=True):
153         if sanitize:
154             text = self.sanitize_text(text)
155
156         signal = [self.char_to_signal(c) for c in text]
157         return str(" ").join(signal)
158
159     def sanitize_morse(self, morse):
160         sanitized = re.sub("_", "-", morse)
161         sanitized = re.sub(r"[^\-\.\/]", " ", sanitized)
162         sanitized = re.sub(r"\s+", " ", sanitized)
163         sanitized = re.sub(r"( ?/ ?)+", " / ", sanitized)
164         return sanitized
165
166     def signal_to_character(self, signal):
167         if signal in self.characters_table:
168             return self.characters_table[signal]
169         else:
170             return '*'
171
172     def morse_to_text(self, morse, sanitize=True):
173         if sanitize:
174             morse = self.sanitize_morse(morse)
175
176         signals = morse.split()
177         characters = [self.signal_to_character(signal) for signal in signals]
178         return str('').join(characters)
179
180
181 def test():
182     translator = MorseTranslator()
183     avg_signal_length, avg_character_length = translator.stats()
184     print("Average signal length:", avg_signal_length)
185     print("Average character length:", avg_character_length)
186
187     text = "Hello, I am just some text."
188
189     print(text)
190
191     morse = translator.text_to_morse(text)
192     print(morse)
193
194     text = translator.morse_to_text(morse)
195     print(text)
196
197     print("\n\nTesting sanitizing functions")
198
199     print()
200     dirty_text = '   <      >Hello::##        this is dirty^^%%  text!     '
201     print(dirty_text)
202     print(translator.sanitize_text(dirty_text))
203     print(translator.text_to_morse(translator.sanitize_text(dirty_text)))
204
205     print()
206     dirty_morse = ' 009 .... . ._.. .-.. --- /34// / // // - .... .. ...' + \
207         '    /    .. ... / -.. .. .-. - -.-- / - . -..- _   '
208     print(dirty_morse)
209     print(translator.sanitize_morse(dirty_morse))
210     print(translator.morse_to_text(translator.sanitize_morse(dirty_morse)))
211
212     print("\n\nTesting conversion on unsanitized strings")
213     print(dirty_text)
214     print(translator.text_to_morse(dirty_text))
215     print(translator.morse_to_text(translator.text_to_morse(dirty_text)))
216
217     print(dirty_morse)
218     print(translator.morse_to_text(dirty_morse))
219
220
221 if __name__ == "__main__":
222     test()