Somewhat fancy voice command recognition software
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

util.py 7.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. # This is part of Kaylee
  2. # -- this code is licensed GPLv3
  3. # Copyright 2015-2017 Clayton G. Hobbs
  4. # Portions Copyright 2013 Jezra
  5. import re
  6. import json
  7. import hashlib
  8. import os
  9. from argparse import ArgumentParser, Namespace
  10. from collections import OrderedDict
  11. import requests
  12. from gi.repository import GLib
  13. class Config:
  14. """Keep track of the configuration of Kaylee"""
  15. # Name of the program, for later use
  16. program_name = "kaylee"
  17. # Directories
  18. conf_dir = os.path.join(GLib.get_user_config_dir(), program_name)
  19. cache_dir = os.path.join(GLib.get_user_cache_dir(), program_name)
  20. data_dir = os.path.join(GLib.get_user_data_dir(), program_name)
  21. # Configuration files
  22. opt_file = os.path.join(conf_dir, "options.json")
  23. plugins_file = os.path.join(conf_dir, "plugins.json")
  24. # Cache files
  25. history_file = os.path.join(cache_dir, program_name + "history")
  26. hash_file = os.path.join(cache_dir, "hash.json")
  27. # Data files
  28. strings_file = os.path.join(data_dir, "sentences.corpus")
  29. lang_file = os.path.join(data_dir, 'lm')
  30. dic_file = os.path.join(data_dir, 'dic')
  31. def __init__(self):
  32. # Ensure necessary directories exist
  33. self._make_dir(self.conf_dir)
  34. self._make_dir(self.cache_dir)
  35. self._make_dir(self.data_dir)
  36. # Set up the argument parser
  37. self._parser = ArgumentParser()
  38. self._parser.add_argument("-i", "--interface", type=str,
  39. dest="interface", action='store',
  40. help="Interface to use (if any). 'g' for GTK or 'gt' for GTK" +
  41. " system tray icon")
  42. self._parser.add_argument("-c", "--continuous",
  43. action="store_true", dest="continuous", default=False,
  44. help="Start interface with 'continuous' listen enabled")
  45. self._parser.add_argument("-H", "--history", type=int,
  46. action="store", dest="history",
  47. help="Number of commands to store in history file")
  48. self._parser.add_argument("-m", "--microphone", type=int,
  49. action="store", dest="microphone", default=None,
  50. help="Audio input card to use (if other than system default)")
  51. self._parser.add_argument("--valid-sentence-command", type=str,
  52. dest="valid_sentence_command", action='store',
  53. help="Command to run when a valid sentence is detected")
  54. self._parser.add_argument("--invalid-sentence-command", type=str,
  55. dest="invalid_sentence_command", action='store',
  56. help="Command to run when an invalid sentence is detected")
  57. # Read the configuration file
  58. self._read_options_file()
  59. # Parse command-line arguments, overriding config file as appropriate
  60. self._parser.parse_args(namespace=self.options)
  61. # Read the plugins file
  62. self._read_plugins_file()
  63. def _make_dir(self, directory):
  64. if not os.path.exists(directory):
  65. os.makedirs(directory)
  66. def _read_options_file(self):
  67. try:
  68. with open(self.opt_file, 'r') as f:
  69. self.options = json.load(f, object_pairs_hook=OrderedDict)
  70. self.options = Namespace(**self.options)
  71. except FileNotFoundError:
  72. # Make an empty options namespace
  73. self.options = Namespace()
  74. def _read_plugins_file(self):
  75. try:
  76. with open(self.plugins_file, 'r') as f:
  77. self.plugins = json.load(f, object_pairs_hook=OrderedDict)
  78. except FileNotFoundError:
  79. self.plugins = None
  80. class Hasher:
  81. """Keep track of hashes for Kaylee"""
  82. def __init__(self, config):
  83. self.config = config
  84. try:
  85. with open(self.config.hash_file, 'r') as f:
  86. self.hashes = json.load(f)
  87. except IOError:
  88. # No stored hash
  89. self.hashes = {}
  90. def __getitem__(self, hashname):
  91. try:
  92. return self.hashes[hashname]
  93. except (KeyError, TypeError):
  94. return None
  95. def __setitem__(self, hashname, value):
  96. self.hashes[hashname] = value
  97. def get_hash_object(self):
  98. """Returns an object to compute a new hash"""
  99. return hashlib.sha256()
  100. def store(self):
  101. """Store the current hashes into a the hash file"""
  102. with open(self.config.hash_file, 'w') as f:
  103. json.dump(self.hashes, f)
  104. class LanguageUpdater:
  105. """
  106. Handles updating the language using the online lmtool.
  107. This class provides methods to check if the corpus has changed, and to
  108. update the language to match the new corpus using the lmtool. This allows
  109. us to automatically update the language if the corpus has changed, saving
  110. the user from having to do this manually.
  111. """
  112. def __init__(self, config):
  113. self.config = config
  114. self.hasher = Hasher(config)
  115. def update_language_if_changed(self):
  116. """Test if the language has changed, and if it has, update it"""
  117. if self.language_has_changed():
  118. self.update_language()
  119. self.save_language_hash()
  120. def language_has_changed(self):
  121. """Use hashes to test if the language has changed"""
  122. self.stored_hash = self.hasher['language']
  123. # Calculate the hash the language file has right now
  124. hasher = self.hasher.get_hash_object()
  125. with open(self.config.strings_file, 'rb') as sfile:
  126. buf = sfile.read()
  127. hasher.update(buf)
  128. self.new_hash = hasher.hexdigest()
  129. return self.new_hash != self.stored_hash
  130. def update_language(self):
  131. """Update the language using the online lmtool"""
  132. print('Updating language using online lmtool')
  133. host = 'http://www.speech.cs.cmu.edu'
  134. url = host + '/cgi-bin/tools/lmtool/run'
  135. # Submit the corpus to the lmtool
  136. response_text = ""
  137. with open(self.config.strings_file, 'rb') as corpus:
  138. files = {'corpus': corpus}
  139. values = {'formtype': 'simple'}
  140. r = requests.post(url, files=files, data=values)
  141. response_text = r.text
  142. # Parse response to get URLs of the files we need
  143. path_re = r'.*<title>Index of (.*?)</title>.*'
  144. number_re = r'.*TAR([0-9]*?)\.tgz.*'
  145. for line in response_text.split('\n'):
  146. # If we found the directory, keep it and don't break
  147. if re.search(path_re, line):
  148. path = host + re.sub(path_re, r'\1', line)
  149. # If we found the number, keep it and break
  150. elif re.search(number_re, line):
  151. number = re.sub(number_re, r'\1', line)
  152. break
  153. lm_url = path + '/' + number + '.lm'
  154. dic_url = path + '/' + number + '.dic'
  155. self._download_file(lm_url, self.config.lang_file)
  156. self._download_file(dic_url, self.config.dic_file)
  157. def save_language_hash(self):
  158. self.hasher['language'] = self.new_hash
  159. self.hasher.store()
  160. def _download_file(self, url, path):
  161. r = requests.get(url, stream=True)
  162. if r.status_code == 200:
  163. with open(path, 'wb') as f:
  164. for chunk in r:
  165. f.write(chunk)