Browse Source

Rewrote NumberParser.parse_all_numbers()

Its control flow was confusing before; now it's much more
straightforward.  We make a string representing classes of words, split
that by a regular expression for number words, then parse each number
and build up our return string and list.  It works just as well as the
previous method, is a bit shorter, and I feel that it's clearer as well.
Clara Hobbs 8 years ago
parent
commit
93f09d0d96
1 changed files with 22 additions and 25 deletions
  1. 22
    25
      numberparser.py

+ 22
- 25
numberparser.py View File

@@ -139,44 +139,41 @@ class NumberParser:
139 139
     def parse_all_numbers(self, text_line):
140 140
         nums = []
141 141
         t_numless = ''
142
-        current_num = ''
143 142
 
144 143
         # Split text_line by commas, whitespace, and hyphens
145
-        text_line = text_line.strip()
146
-        text_words = re.split(r'[,\s-]+', text_line)
144
+        text_words = re.split(r'[,\s-]+', text_line.strip())
145
+        # Get a string of word classes
146
+        tw_classes = ''
147 147
         for word in text_words:
148
-            # If we aren't starting a number, add the word to the result string
149
-            if word not in self.mandatory_number_words:
150
-                if current_num:
151
-                    if word in self.number_words:
152
-                        current_num += word + ' '
153
-                    else:
154
-                        try:
155
-                            nums.append(self.parse_number(current_num))
156
-                        except ValueError:
157
-                            nums.append(-1)
158
-                        current_num = ''
159
-                        t_numless += '%d' + ' '
160
-                if not current_num:
161
-                    t_numless += word + ' '
148
+            if word in self.mandatory_number_words:
149
+                tw_classes += 'm'
150
+            elif word in self.allowed:
151
+                tw_classes += 'a'
162 152
             else:
163
-                # We're parsing a number now
164
-                current_num += word + ' '
165
-        if current_num:
153
+                tw_classes += 'w'
154
+
155
+        # For each string of number words:
156
+        last_end = 0
157
+        for m in re.finditer('m[am]*m|m', tw_classes):
158
+            # Get the number words
159
+            num_words = ' '.join(text_words[m.start():m.end()])
160
+            # Parse the number and store the value
166 161
             try:
167
-                nums.append(self.parse_number(current_num))
162
+                nums.append(self.parse_number(num_words))
168 163
             except ValueError:
169 164
                 nums.append(-1)
170
-            current_num = ''
171
-            t_numless += '%d' + ' '
165
+            # Add words to t_numless
166
+            t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d '
167
+            last_end = m.end()
168
+        t_numless += ' '.join(text_words[last_end:])
172 169
 
173 170
         return (t_numless.strip(), nums)
174 171
 
175 172
 if __name__ == '__main__':
176 173
     np = NumberParser()
177 174
     # Get the words to translate
178
-    text_line = input('Enter a number: ')
175
+    text_line = input('Enter a string: ')
179 176
     # Parse it to an integer
180 177
     value = np.parse_all_numbers(text_line)
181 178
     # Print the result
182
-    print('I claim that you meant the decimal number', value)
179
+    print(value)

Loading…
Cancel
Save