|
@@ -0,0 +1,182 @@
|
|
1
|
+#!/usr/bin/env python
|
|
2
|
+# numberparser.py - Translate words to decimal
|
|
3
|
+
|
|
4
|
+# This is part of Kaylee
|
|
5
|
+# -- this code is licensed GPLv3
|
|
6
|
+# Copyright 2015 Clayton G. Hobbs
|
|
7
|
+# Portions Copyright 2013 Jezra
|
|
8
|
+import re
|
|
9
|
+
|
|
10
|
+# Define the mappings from words to numbers
|
|
11
|
+class NumberParser:
|
|
12
|
+ zero = {
|
|
13
|
+ 'zero': 0
|
|
14
|
+ }
|
|
15
|
+
|
|
16
|
+ ones = {
|
|
17
|
+ 'one': 1,
|
|
18
|
+ 'two': 2,
|
|
19
|
+ 'three': 3,
|
|
20
|
+ 'four': 4,
|
|
21
|
+ 'five': 5,
|
|
22
|
+ 'six': 6,
|
|
23
|
+ 'seven': 7,
|
|
24
|
+ 'eight': 8,
|
|
25
|
+ 'nine': 9
|
|
26
|
+ }
|
|
27
|
+
|
|
28
|
+ special_ones = {
|
|
29
|
+ 'ten': 10,
|
|
30
|
+ 'eleven': 11,
|
|
31
|
+ 'twelve': 12,
|
|
32
|
+ 'thirteen': 13,
|
|
33
|
+ 'fourteen': 14,
|
|
34
|
+ 'fifteen': 15,
|
|
35
|
+ 'sixteen': 16,
|
|
36
|
+ 'seventeen': 17,
|
|
37
|
+ 'eighteen': 18,
|
|
38
|
+ 'ninteen': 19
|
|
39
|
+ }
|
|
40
|
+
|
|
41
|
+ tens = {
|
|
42
|
+ 'twenty': 20,
|
|
43
|
+ 'thirty': 30,
|
|
44
|
+ 'fourty': 40,
|
|
45
|
+ 'fifty': 50,
|
|
46
|
+ 'sixty': 60,
|
|
47
|
+ 'seventy': 70,
|
|
48
|
+ 'eighty': 80,
|
|
49
|
+ 'ninty': 90
|
|
50
|
+ }
|
|
51
|
+
|
|
52
|
+ hundred = {
|
|
53
|
+ 'hundred': 100
|
|
54
|
+ }
|
|
55
|
+
|
|
56
|
+ exp = {
|
|
57
|
+ 'thousand': 1000,
|
|
58
|
+ 'million': 1000000,
|
|
59
|
+ 'billion': 1000000000
|
|
60
|
+ }
|
|
61
|
+
|
|
62
|
+ allowed = [
|
|
63
|
+ 'and'
|
|
64
|
+ ]
|
|
65
|
+
|
|
66
|
+ def __init__(self):
|
|
67
|
+ self.number_words = []
|
|
68
|
+ for word in self.zero:
|
|
69
|
+ self.number_words.append(word)
|
|
70
|
+ for word in self.ones:
|
|
71
|
+ self.number_words.append(word)
|
|
72
|
+ for word in self.special_ones:
|
|
73
|
+ self.number_words.append(word)
|
|
74
|
+ for word in self.tens:
|
|
75
|
+ self.number_words.append(word)
|
|
76
|
+ for word in self.hundred:
|
|
77
|
+ self.number_words.append(word)
|
|
78
|
+ for word in self.exp:
|
|
79
|
+ self.number_words.append(word)
|
|
80
|
+ self.mandatory_number_words = self.number_words.copy()
|
|
81
|
+ for word in self.allowed:
|
|
82
|
+ self.number_words.append(word)
|
|
83
|
+
|
|
84
|
+ def parse_number(self, text_line):
|
|
85
|
+ """
|
|
86
|
+ Parse numbers from natural language into ints
|
|
87
|
+
|
|
88
|
+ TODO: Throw more exceptions when invalid numbers are detected. Only
|
|
89
|
+ allow certian valueless words within numbers. Support zero.
|
|
90
|
+ """
|
|
91
|
+ value = 0
|
|
92
|
+ partial_value = 0
|
|
93
|
+ last_list = None
|
|
94
|
+
|
|
95
|
+ # Split text_line by commas, whitespace, and hyphens
|
|
96
|
+ text_line = text_line.strip()
|
|
97
|
+ text_words = re.split(r'[,\s-]+', text_line)
|
|
98
|
+ # Parse the number
|
|
99
|
+ for word in text_words:
|
|
100
|
+ if word in self.zero:
|
|
101
|
+ if last_list is not None:
|
|
102
|
+ raise ValueError('Invalid number')
|
|
103
|
+ value = 0
|
|
104
|
+ last_list = self.zero
|
|
105
|
+ elif word in self.ones:
|
|
106
|
+ if last_list in (self.zero, self.ones, self.special_ones):
|
|
107
|
+ raise ValueError('Invalid number')
|
|
108
|
+ value += self.ones[word]
|
|
109
|
+ last_list = self.ones
|
|
110
|
+ elif word in self.special_ones:
|
|
111
|
+ if last_list in (self.zero, self.ones, self.special_ones, self.tens):
|
|
112
|
+ raise ValueError('Invalid number')
|
|
113
|
+ value += self.special_ones[word]
|
|
114
|
+ last_list = self.special_ones
|
|
115
|
+ elif word in self.tens:
|
|
116
|
+ if last_list in (self.zero, self.ones, self.special_ones, self.tens):
|
|
117
|
+ raise ValueError('Invalid number')
|
|
118
|
+ value += self.tens[word]
|
|
119
|
+ last_list = self.tens
|
|
120
|
+ elif word in self.hundred:
|
|
121
|
+ if last_list not in (self.ones, self.special_ones, self.tens):
|
|
122
|
+ raise ValueError('Invalid number')
|
|
123
|
+ value *= self.hundred[word]
|
|
124
|
+ last_list = self.hundred
|
|
125
|
+ elif word in self.exp:
|
|
126
|
+ if last_list in (self.zero, self.exp):
|
|
127
|
+ raise ValueError('Invalid number')
|
|
128
|
+ partial_value += value * self.exp[word]
|
|
129
|
+ value = 0
|
|
130
|
+ last_list = self.exp
|
|
131
|
+ elif word not in self.allowed:
|
|
132
|
+ raise ValueError('Invalid number')
|
|
133
|
+ # Debugging information
|
|
134
|
+ #print(word, value, partial_value)
|
|
135
|
+ # Finish parsing the number
|
|
136
|
+ value += partial_value
|
|
137
|
+ return value
|
|
138
|
+
|
|
139
|
+ def parse_all_numbers(self, text_line):
|
|
140
|
+ nums = []
|
|
141
|
+ t_numless = ''
|
|
142
|
+ current_num = ''
|
|
143
|
+
|
|
144
|
+ # Split text_line by commas, whitespace, and hyphens
|
|
145
|
+ text_line = text_line.strip()
|
|
146
|
+ text_words = re.split(r'[,\s-]+', text_line)
|
|
147
|
+ for word in text_words:
|
|
148
|
+ # If we aren't starting a number, add the word to the result string
|
|
149
|
+ if word not in self.mandatory_number_words:
|
|
150
|
+ if current_num:
|
|
151
|
+ if word in self.number_words:
|
|
152
|
+ current_num += word + ' '
|
|
153
|
+ else:
|
|
154
|
+ try:
|
|
155
|
+ nums.append(self.parse_number(current_num))
|
|
156
|
+ except ValueError:
|
|
157
|
+ nums.append(-1)
|
|
158
|
+ current_num = ''
|
|
159
|
+ t_numless += '%d' + ' '
|
|
160
|
+ if not current_num:
|
|
161
|
+ t_numless += word + ' '
|
|
162
|
+ else:
|
|
163
|
+ # We're parsing a number now
|
|
164
|
+ current_num += word + ' '
|
|
165
|
+ if current_num:
|
|
166
|
+ try:
|
|
167
|
+ nums.append(self.parse_number(current_num))
|
|
168
|
+ except ValueError:
|
|
169
|
+ nums.append(-1)
|
|
170
|
+ current_num = ''
|
|
171
|
+ t_numless += '%d' + ' '
|
|
172
|
+
|
|
173
|
+ return (t_numless.strip(), nums)
|
|
174
|
+
|
|
175
|
+if __name__ == '__main__':
|
|
176
|
+ np = NumberParser()
|
|
177
|
+ # Get the words to translate
|
|
178
|
+ text_line = input('Enter a number: ')
|
|
179
|
+ # Parse it to an integer
|
|
180
|
+ value = np.parse_all_numbers(text_line)
|
|
181
|
+ # Print the result
|
|
182
|
+ print('I claim that you meant the decimal number', value)
|