81848290ab4c06ca544aba323b2b75e322cdfba6
[SaveMySugar/python3-savemysugar.git] / src / savemysugar / MorseTranslator.py
1 #!/usr/bin/env python3
2 #
3 # MorseTranslator - translate to and from Morse code
4 #
5 # Copyright (C) 2015  Antonio Ospite <ao2@ao2.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20 import re
21
22
23 class MorseTranslator(object):
24     """International Morse Code translator.
25
26     The specification of the International Morse Code is in ITU-R M.1677-1
27     (10/2009), Annex 1.
28
29     The terminology used here may differ from the one used in some other
30     places, so here is some nomenclature:
31
32         symbol: one of . (dot), - (dash), ' ' (signal separator),
33             '/' (word separator)
34
35         character: a letter of the alphabet, a number, a punctuation mark, or
36             a ' ' (text word separator)
37
38         signal: a sequence of . and - symbols which encode a character,
39             or a '/' (Morse word separator)
40
41         word: a sequence of characters not containing a ' ', or
42             a sequence of signals not containing a '/'
43
44         text: a sequence of characters
45
46         morse: a sequence of signals separated by ' '
47
48     NOTE:
49     signals are separated by a ' ' (signal separator), characters are not
50     separated one from the other.
51
52     This class defines a subset of the signals in Section 1 of the
53     aforementioned specification, plus a word space, and it does not make
54     assumptions about their actual transmission.
55     """
56
57     def __init__(self):
58         self.signals_table = {}
59         self.characters_table = {}
60
61         # XXX the current code only handles single characters,
62         # so prosigns are not added to the tables below
63
64         # Letters
65         self.signals_table['a'] = ".-"
66         self.signals_table['b'] = "-..."
67         self.signals_table['c'] = "-.-."
68         self.signals_table['d'] = "-.."
69         self.signals_table['e'] = "."
70         self.signals_table['f'] = "..-."
71         self.signals_table['g'] = "--."
72         self.signals_table['h'] = "...."
73         self.signals_table['i'] = ".."
74         self.signals_table['j'] = ".---"
75         self.signals_table['k'] = "-.-"
76         self.signals_table['l'] = ".-.."
77         self.signals_table['m'] = "--"
78         self.signals_table['n'] = "-."
79         self.signals_table['o'] = "---"
80         self.signals_table['p'] = ".--."
81         self.signals_table['q'] = "--.-"
82         self.signals_table['r'] = ".-."
83         self.signals_table['s'] = "..."
84         self.signals_table['t'] = "-"
85         self.signals_table['u'] = "..-"
86         self.signals_table['v'] = "...-"
87         self.signals_table['w'] = ".--"
88         self.signals_table['x'] = "-..-"
89         self.signals_table['y'] = "-.--"
90         self.signals_table['z'] = "--.."
91         # Figures
92         self.signals_table['1'] = ".----"
93         self.signals_table['2'] = "..---"
94         self.signals_table['3'] = "...--"
95         self.signals_table['4'] = "....-"
96         self.signals_table['5'] = "....."
97         self.signals_table['6'] = "-...."
98         self.signals_table['7'] = "--..."
99         self.signals_table['8'] = "---.."
100         self.signals_table['9'] = "----."
101         self.signals_table['0'] = "-----"
102         # Punctuation marks and miscellaneous signs
103         self.signals_table['.'] = ".-.-.-"
104         self.signals_table[','] = "--..--"
105         self.signals_table[':'] = "---..."
106         self.signals_table['?'] = "..--.."
107         self.signals_table['\''] = ".----."
108         self.signals_table['-'] = "-....-"
109         self.signals_table['/'] = "-..-."
110         self.signals_table['('] = "-.--."
111         self.signals_table[')'] = "-.--.-"
112         self.signals_table['"'] = ".-..-."
113         self.signals_table['='] = "-...-"
114         self.signals_table['+'] = ".-.-."
115         self.signals_table['x'] = "-..-"
116         self.signals_table['@'] = ".--.-."
117
118         # Represent the word space as a signal with only one "/" symbol
119         self.signals_table[' '] = "/"
120
121         for key, value in self.signals_table.items():
122             self.characters_table[value] = key
123
124     def stats(self):
125         signal_length_sum = 0
126         for signal in self.signals_table.values():
127             signal_length_sum += len(signal)
128
129         average_signal_length = signal_length_sum / len(self.signals_table)
130
131         character_length_sum = 0
132         for character in self.characters_table.values():
133             character_length_sum += len(character)
134
135         average_char_length = character_length_sum / len(self.characters_table)
136
137         return average_signal_length, average_char_length
138
139     def sanitize_text(self, text):
140         sanitized = text.lower()
141         sanitized = re.sub(r"[^a-z0-9.,?\'\"/() \-=\+@]", "", sanitized)
142         sanitized = re.sub(r"\s+", " ", sanitized)
143         sanitized = re.sub(r"^\s+", "", sanitized)
144         sanitized = re.sub(r"\s+$", "", sanitized)
145         return sanitized
146
147     def char_to_signal(self, character):
148         char = character.lower()
149         if char in self.signals_table:
150             return self.signals_table[char]
151         else:
152             return ""
153
154     def text_to_morse(self, text, sanitize=True):
155         if sanitize:
156             text = self.sanitize_text(text)
157
158         signal = [self.char_to_signal(c) for c in text]
159         return str(" ").join(signal)
160
161     def sanitize_morse(self, morse):
162         sanitized = re.sub("_", "-", morse)
163         sanitized = re.sub(r"[^\-\.\/]", " ", sanitized)
164         sanitized = re.sub(r"\|", "/", sanitized)
165         sanitized = re.sub(r"\s+", " ", sanitized)
166         sanitized = re.sub(r"( ?/ ?)+", " / ", sanitized)
167         sanitized = re.sub(r"^[ /]+", "", sanitized)
168         sanitized = re.sub(r"[ /]+$", "", sanitized)
169         return sanitized
170
171     def signal_to_character(self, signal):
172         if signal in self.characters_table:
173             return self.characters_table[signal]
174         else:
175             return '*'
176
177     def morse_to_text(self, morse, sanitize=True):
178         if sanitize:
179             morse = self.sanitize_morse(morse)
180
181         signals = morse.split()
182         characters = [self.signal_to_character(signal) for signal in signals]
183         return str('').join(characters)
184
185
186 def test():
187     translator = MorseTranslator()
188     avg_signal_length, avg_character_length = translator.stats()
189     print("Average signal length:", avg_signal_length)
190     print("Average character length:", avg_character_length)
191
192     text = "Hello, I am just some text."
193
194     print(text)
195
196     morse = translator.text_to_morse(text)
197     print(morse)
198
199     text = translator.morse_to_text(morse)
200     print(text)
201
202     print("\n\nTesting sanitizing functions")
203
204     print()
205     dirty_text = '   <      >Hello::##        this is dirty^^%%  text!     '
206     print(dirty_text)
207     print(translator.sanitize_text(dirty_text))
208     print(translator.text_to_morse(translator.sanitize_text(dirty_text)))
209
210     print()
211     dirty_morse = ' 009 .... . ._.. .-.. --- /34// / // // - .... .. ...' + \
212         '    /    .. ... / -.. .. .-. - -.-- / - . -..- _   '
213     print(dirty_morse)
214     print(translator.sanitize_morse(dirty_morse))
215     print(translator.morse_to_text(translator.sanitize_morse(dirty_morse)))
216
217     print("\n\nTesting conversion on unsanitized strings")
218     print(dirty_text)
219     print(translator.text_to_morse(dirty_text))
220     print(translator.morse_to_text(translator.text_to_morse(dirty_text)))
221
222     print(dirty_morse)
223     print(translator.morse_to_text(dirty_morse))
224
225
226 if __name__ == "__main__":
227     test()