Somewhat fancy voice command recognition software
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

numberparser.py 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. #!/usr/bin/env python
  2. # numberparser.py - Translate words to decimal
  3. # This is part of Kaylee
  4. # -- this code is licensed GPLv3
  5. # Copyright 2015-2016 Clayton G. Hobbs
  6. # Portions Copyright 2013 Jezra
  7. import re
  8. # Define the mappings from words to numbers
  9. class NumberParser:
  10. zero = {
  11. 'zero': 0
  12. }
  13. ones = {
  14. 'one': 1,
  15. 'two': 2,
  16. 'three': 3,
  17. 'four': 4,
  18. 'five': 5,
  19. 'six': 6,
  20. 'seven': 7,
  21. 'eight': 8,
  22. 'nine': 9
  23. }
  24. special_ones = {
  25. 'ten': 10,
  26. 'eleven': 11,
  27. 'twelve': 12,
  28. 'thirteen': 13,
  29. 'fourteen': 14,
  30. 'fifteen': 15,
  31. 'sixteen': 16,
  32. 'seventeen': 17,
  33. 'eighteen': 18,
  34. 'ninteen': 19
  35. }
  36. tens = {
  37. 'twenty': 20,
  38. 'thirty': 30,
  39. 'fourty': 40,
  40. 'fifty': 50,
  41. 'sixty': 60,
  42. 'seventy': 70,
  43. 'eighty': 80,
  44. 'ninty': 90
  45. }
  46. hundred = {
  47. 'hundred': 100
  48. }
  49. exp = {
  50. 'thousand': 1000,
  51. 'million': 1000000,
  52. 'billion': 1000000000
  53. }
  54. allowed = [
  55. 'and'
  56. ]
  57. def __init__(self):
  58. self.number_words = []
  59. for word in sorted(self.zero.keys()):
  60. self.number_words.append(word)
  61. for word in sorted(self.ones.keys()):
  62. self.number_words.append(word)
  63. for word in sorted(self.special_ones.keys()):
  64. self.number_words.append(word)
  65. for word in sorted(self.tens.keys()):
  66. self.number_words.append(word)
  67. for word in sorted(self.hundred.keys()):
  68. self.number_words.append(word)
  69. for word in sorted(self.exp.keys()):
  70. self.number_words.append(word)
  71. self.mandatory_number_words = self.number_words.copy()
  72. for word in sorted(self.allowed):
  73. self.number_words.append(word)
  74. def parse_number(self, text_line):
  75. """
  76. Parse numbers from natural language into ints
  77. TODO: Throw more exceptions when invalid numbers are detected. Only
  78. allow certian valueless words within numbers. Support zero.
  79. """
  80. value = 0
  81. partial_value = 0
  82. last_list = None
  83. # Split text_line by commas, whitespace, and hyphens
  84. text_line = text_line.strip()
  85. text_words = re.split(r'[,\s-]+', text_line)
  86. # Parse the number
  87. for word in text_words:
  88. if word in self.zero:
  89. if last_list is not None:
  90. raise ValueError('Invalid number')
  91. value = 0
  92. last_list = self.zero
  93. elif word in self.ones:
  94. if last_list in (self.zero, self.ones, self.special_ones):
  95. raise ValueError('Invalid number')
  96. value += self.ones[word]
  97. last_list = self.ones
  98. elif word in self.special_ones:
  99. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  100. raise ValueError('Invalid number')
  101. value += self.special_ones[word]
  102. last_list = self.special_ones
  103. elif word in self.tens:
  104. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  105. raise ValueError('Invalid number')
  106. value += self.tens[word]
  107. last_list = self.tens
  108. elif word in self.hundred:
  109. if last_list not in (self.ones, self.special_ones, self.tens):
  110. raise ValueError('Invalid number')
  111. value *= self.hundred[word]
  112. last_list = self.hundred
  113. elif word in self.exp:
  114. if last_list in (self.zero, self.exp):
  115. raise ValueError('Invalid number')
  116. partial_value += value * self.exp[word]
  117. value = 0
  118. last_list = self.exp
  119. elif word not in self.allowed:
  120. raise ValueError('Invalid number')
  121. # Debugging information
  122. #print(word, value, partial_value)
  123. # Finish parsing the number
  124. value += partial_value
  125. return value
  126. def parse_all_numbers(self, text_line):
  127. nums = []
  128. t_numless = ''
  129. # Split text_line by commas, whitespace, and hyphens
  130. text_words = re.split(r'[,\s-]+', text_line.strip())
  131. # Get a string of word classes
  132. tw_classes = ''
  133. for word in text_words:
  134. if word in self.mandatory_number_words:
  135. tw_classes += 'm'
  136. elif word in self.allowed:
  137. tw_classes += 'a'
  138. else:
  139. tw_classes += 'w'
  140. # For each string of number words:
  141. last_end = 0
  142. for m in re.finditer('m[am]*m|m', tw_classes):
  143. # Get the number words
  144. num_words = ' '.join(text_words[m.start():m.end()])
  145. # Parse the number and store the value
  146. try:
  147. nums.append(self.parse_number(num_words))
  148. except ValueError:
  149. nums.append(-1)
  150. # Add words to t_numless
  151. t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d '
  152. last_end = m.end()
  153. t_numless += ' '.join(text_words[last_end:])
  154. return (t_numless.strip(), nums)
  155. if __name__ == '__main__':
  156. np = NumberParser()
  157. # Get the words to translate
  158. text_line = input('Enter a string: ')
  159. # Parse it to an integer
  160. value = np.parse_all_numbers(text_line)
  161. # Print the result
  162. print(value)