Somewhat fancy voice command recognition software
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

numbers.py 5.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. #!/usr/bin/env python
  2. # This is part of Kaylee
  3. # -- this code is licensed GPLv3
  4. # Copyright 2015-2016 Clayton G. Hobbs
  5. # Portions Copyright 2013 Jezra
  6. import re
  7. # Define the mappings from words to numbers
  8. class NumberParser:
  9. zero = {
  10. 'zero': 0
  11. }
  12. ones = {
  13. 'one': 1,
  14. 'two': 2,
  15. 'three': 3,
  16. 'four': 4,
  17. 'five': 5,
  18. 'six': 6,
  19. 'seven': 7,
  20. 'eight': 8,
  21. 'nine': 9
  22. }
  23. special_ones = {
  24. 'ten': 10,
  25. 'eleven': 11,
  26. 'twelve': 12,
  27. 'thirteen': 13,
  28. 'fourteen': 14,
  29. 'fifteen': 15,
  30. 'sixteen': 16,
  31. 'seventeen': 17,
  32. 'eighteen': 18,
  33. 'ninteen': 19
  34. }
  35. tens = {
  36. 'twenty': 20,
  37. 'thirty': 30,
  38. 'fourty': 40,
  39. 'fifty': 50,
  40. 'sixty': 60,
  41. 'seventy': 70,
  42. 'eighty': 80,
  43. 'ninty': 90
  44. }
  45. hundred = {
  46. 'hundred': 100
  47. }
  48. exp = {
  49. 'thousand': 1000,
  50. 'million': 1000000,
  51. 'billion': 1000000000
  52. }
  53. allowed = [
  54. 'and'
  55. ]
  56. def __init__(self):
  57. self.number_words = []
  58. for word in sorted(self.zero.keys()):
  59. self.number_words.append(word)
  60. for word in sorted(self.ones.keys()):
  61. self.number_words.append(word)
  62. for word in sorted(self.special_ones.keys()):
  63. self.number_words.append(word)
  64. for word in sorted(self.tens.keys()):
  65. self.number_words.append(word)
  66. for word in sorted(self.hundred.keys()):
  67. self.number_words.append(word)
  68. for word in sorted(self.exp.keys()):
  69. self.number_words.append(word)
  70. self.mandatory_number_words = self.number_words.copy()
  71. for word in sorted(self.allowed):
  72. self.number_words.append(word)
  73. def parse_number(self, text_line):
  74. """
  75. Parse numbers from natural language into ints
  76. TODO: Throw more exceptions when invalid numbers are detected. Only
  77. allow certian valueless words within numbers. Support zero.
  78. """
  79. value = 0
  80. partial_value = 0
  81. last_list = None
  82. # Split text_line by commas, whitespace, and hyphens
  83. text_line = text_line.strip()
  84. text_words = re.split(r'[,\s-]+', text_line)
  85. # Parse the number
  86. for word in text_words:
  87. if word in self.zero:
  88. if last_list is not None:
  89. raise ValueError('Invalid number')
  90. value = 0
  91. last_list = self.zero
  92. elif word in self.ones:
  93. if last_list in (self.zero, self.ones, self.special_ones):
  94. raise ValueError('Invalid number')
  95. value += self.ones[word]
  96. last_list = self.ones
  97. elif word in self.special_ones:
  98. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  99. raise ValueError('Invalid number')
  100. value += self.special_ones[word]
  101. last_list = self.special_ones
  102. elif word in self.tens:
  103. if last_list in (self.zero, self.ones, self.special_ones, self.tens):
  104. raise ValueError('Invalid number')
  105. value += self.tens[word]
  106. last_list = self.tens
  107. elif word in self.hundred:
  108. if last_list not in (self.ones, self.special_ones, self.tens):
  109. raise ValueError('Invalid number')
  110. value *= self.hundred[word]
  111. last_list = self.hundred
  112. elif word in self.exp:
  113. if last_list in (self.zero, self.exp):
  114. raise ValueError('Invalid number')
  115. partial_value += value * self.exp[word]
  116. value = 0
  117. last_list = self.exp
  118. elif word not in self.allowed:
  119. raise ValueError('Invalid number')
  120. # Debugging information
  121. #print(word, value, partial_value)
  122. # Finish parsing the number
  123. value += partial_value
  124. return value
  125. def parse_all_numbers(self, text_line):
  126. nums = []
  127. t_numless = ''
  128. # Split text_line by commas, whitespace, and hyphens
  129. text_words = re.split(r'[,\s-]+', text_line.strip())
  130. # Get a string of word classes
  131. tw_classes = ''
  132. for word in text_words:
  133. if word in self.mandatory_number_words:
  134. tw_classes += 'm'
  135. elif word in self.allowed:
  136. tw_classes += 'a'
  137. else:
  138. tw_classes += 'w'
  139. # For each string of number words:
  140. last_end = 0
  141. for m in re.finditer('m[am]*m|m', tw_classes):
  142. # Get the number words
  143. num_words = ' '.join(text_words[m.start():m.end()])
  144. # Parse the number and store the value
  145. try:
  146. nums.append(self.parse_number(num_words))
  147. except ValueError:
  148. nums.append(-1)
  149. # Add words to t_numless
  150. t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d '
  151. last_end = m.end()
  152. t_numless += ' '.join(text_words[last_end:])
  153. return (t_numless.strip(), nums)
  154. if __name__ == '__main__':
  155. np = NumberParser()
  156. # Get the words to translate
  157. text_line = input('Enter a string: ')
  158. # Parse it to an integer
  159. value = np.parse_all_numbers(text_line)
  160. # Print the result
  161. print(value)