123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187 |
- #!/usr/bin/env python
- # This is part of Kaylee
- # -- this code is licensed GPLv3
- # Copyright 2015-2016 Clayton G. Hobbs
- # Portions Copyright 2013 Jezra
-
- import re
-
-
- class NumberParser:
- """Parses integers from English strings"""
-
- zero = {
- 'zero': 0
- }
-
- ones = {
- 'one': 1,
- 'two': 2,
- 'three': 3,
- 'four': 4,
- 'five': 5,
- 'six': 6,
- 'seven': 7,
- 'eight': 8,
- 'nine': 9
- }
-
- special_ones = {
- 'ten': 10,
- 'eleven': 11,
- 'twelve': 12,
- 'thirteen': 13,
- 'fourteen': 14,
- 'fifteen': 15,
- 'sixteen': 16,
- 'seventeen': 17,
- 'eighteen': 18,
- 'ninteen': 19
- }
-
- tens = {
- 'twenty': 20,
- 'thirty': 30,
- 'forty': 40,
- 'fifty': 50,
- 'sixty': 60,
- 'seventy': 70,
- 'eighty': 80,
- 'ninty': 90
- }
-
- hundred = {
- 'hundred': 100
- }
-
- exp = {
- 'thousand': 1000,
- 'million': 1000000,
- 'billion': 1000000000
- }
-
- allowed = [
- 'and'
- ]
-
- number_words = None
- mandatory_number_words = None
-
- def __init__(self):
- if NumberParser.number_words is None:
- NumberParser.number_words = []
- for word in sorted(self.zero.keys()):
- NumberParser.number_words.append(word)
- for word in sorted(self.ones.keys()):
- NumberParser.number_words.append(word)
- for word in sorted(self.special_ones.keys()):
- NumberParser.number_words.append(word)
- for word in sorted(self.tens.keys()):
- NumberParser.number_words.append(word)
- for word in sorted(self.hundred.keys()):
- NumberParser.number_words.append(word)
- for word in sorted(self.exp.keys()):
- NumberParser.number_words.append(word)
- NumberParser.mandatory_number_words = self.number_words.copy()
- for word in sorted(self.allowed):
- NumberParser.number_words.append(word)
-
- def parse_number(self, text_line):
- """Parse a number from English into an int"""
- value = 0
- partial_value = 0
- last_list = None
-
- # Split text_line by commas, whitespace, and hyphens
- text_line = text_line.strip()
- text_words = re.split(r'[,\s-]+', text_line)
- # Parse the number
- for word in text_words:
- if word in self.zero:
- if last_list is not None:
- raise ValueError('Invalid number')
- value = 0
- last_list = self.zero
- elif word in self.ones:
- if last_list in (self.zero, self.ones, self.special_ones):
- raise ValueError('Invalid number')
- value += self.ones[word]
- last_list = self.ones
- elif word in self.special_ones:
- if last_list in (self.zero, self.ones, self.special_ones, self.tens):
- raise ValueError('Invalid number')
- value += self.special_ones[word]
- last_list = self.special_ones
- elif word in self.tens:
- if last_list in (self.zero, self.ones, self.special_ones, self.tens):
- raise ValueError('Invalid number')
- value += self.tens[word]
- last_list = self.tens
- elif word in self.hundred:
- if last_list not in (self.ones, self.special_ones, self.tens):
- raise ValueError('Invalid number')
- value *= self.hundred[word]
- last_list = self.hundred
- elif word in self.exp:
- if last_list in (self.zero, self.exp):
- raise ValueError('Invalid number')
- partial_value += value * self.exp[word]
- value = 0
- last_list = self.exp
- elif word not in self.allowed:
- raise ValueError('Invalid number')
- # Debugging information
- #print(word, value, partial_value)
- # Finish parsing the number
- value += partial_value
- return value
-
- def parse_all_numbers(self, text_line):
- """
- Parse all numbers from English to ints
-
- Returns a tuple whose first element is text_line with all English
- numbers replaced with "%d", and whose second element is a list
- containing all the parsed numbers as ints.
- """
- nums = []
- t_numless = ''
-
- # Split text_line by commas, whitespace, and hyphens
- text_words = re.split(r'[,\s-]+', text_line.strip())
- # Get a string of word classes
- tw_classes = ''
- for word in text_words:
- if word in self.mandatory_number_words:
- tw_classes += 'm'
- elif word in self.allowed:
- tw_classes += 'a'
- else:
- tw_classes += 'w'
-
- # For each string of number words:
- last_end = 0
- for m in re.finditer('m[am]*m|m', tw_classes):
- # Get the number words
- num_words = ' '.join(text_words[m.start():m.end()])
- # Parse the number and store the value
- try:
- nums.append(self.parse_number(num_words))
- except ValueError:
- nums.append(-1)
- # Add words to t_numless
- t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d '
- last_end = m.end()
- t_numless += ' '.join(text_words[last_end:])
-
- return (t_numless.strip(), nums)
-
-
- if __name__ == '__main__':
- np = NumberParser()
- # Get the words to translate
- text_line = input('Enter a string: ')
- # Parse it to an integer
- value = np.parse_all_numbers(text_line)
- # Print the result
- print(value)
|