|
@@ -139,44 +139,41 @@ class NumberParser:
|
139
|
139
|
def parse_all_numbers(self, text_line):
|
140
|
140
|
nums = []
|
141
|
141
|
t_numless = ''
|
142
|
|
- current_num = ''
|
143
|
142
|
|
144
|
143
|
# Split text_line by commas, whitespace, and hyphens
|
145
|
|
- text_line = text_line.strip()
|
146
|
|
- text_words = re.split(r'[,\s-]+', text_line)
|
|
144
|
+ text_words = re.split(r'[,\s-]+', text_line.strip())
|
|
145
|
+ # Get a string of word classes
|
|
146
|
+ tw_classes = ''
|
147
|
147
|
for word in text_words:
|
148
|
|
- # If we aren't starting a number, add the word to the result string
|
149
|
|
- if word not in self.mandatory_number_words:
|
150
|
|
- if current_num:
|
151
|
|
- if word in self.number_words:
|
152
|
|
- current_num += word + ' '
|
153
|
|
- else:
|
154
|
|
- try:
|
155
|
|
- nums.append(self.parse_number(current_num))
|
156
|
|
- except ValueError:
|
157
|
|
- nums.append(-1)
|
158
|
|
- current_num = ''
|
159
|
|
- t_numless += '%d' + ' '
|
160
|
|
- if not current_num:
|
161
|
|
- t_numless += word + ' '
|
|
148
|
+ if word in self.mandatory_number_words:
|
|
149
|
+ tw_classes += 'm'
|
|
150
|
+ elif word in self.allowed:
|
|
151
|
+ tw_classes += 'a'
|
162
|
152
|
else:
|
163
|
|
- # We're parsing a number now
|
164
|
|
- current_num += word + ' '
|
165
|
|
- if current_num:
|
|
153
|
+ tw_classes += 'w'
|
|
154
|
+
|
|
155
|
+ # For each string of number words:
|
|
156
|
+ last_end = 0
|
|
157
|
+ for m in re.finditer('m[am]*m|m', tw_classes):
|
|
158
|
+ # Get the number words
|
|
159
|
+ num_words = ' '.join(text_words[m.start():m.end()])
|
|
160
|
+ # Parse the number and store the value
|
166
|
161
|
try:
|
167
|
|
- nums.append(self.parse_number(current_num))
|
|
162
|
+ nums.append(self.parse_number(num_words))
|
168
|
163
|
except ValueError:
|
169
|
164
|
nums.append(-1)
|
170
|
|
- current_num = ''
|
171
|
|
- t_numless += '%d' + ' '
|
|
165
|
+ # Add words to t_numless
|
|
166
|
+ t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d '
|
|
167
|
+ last_end = m.end()
|
|
168
|
+ t_numless += ' '.join(text_words[last_end:])
|
172
|
169
|
|
173
|
170
|
return (t_numless.strip(), nums)
|
174
|
171
|
|
175
|
172
|
if __name__ == '__main__':
|
176
|
173
|
np = NumberParser()
|
177
|
174
|
# Get the words to translate
|
178
|
|
- text_line = input('Enter a number: ')
|
|
175
|
+ text_line = input('Enter a string: ')
|
179
|
176
|
# Parse it to an integer
|
180
|
177
|
value = np.parse_all_numbers(text_line)
|
181
|
178
|
# Print the result
|
182
|
|
- print('I claim that you meant the decimal number', value)
|
|
179
|
+ print(value)
|