Somewhat fancy voice command recognition software
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

languageupdater.py 2.6KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. # This is part of Kaylee
  2. # -- this code is licensed GPLv3
  3. # Copyright 2015-2016 Clayton G. Hobbs
  4. # Portions Copyright 2013 Jezra
  5. import re
  6. import requests
  7. from hasher import Hasher
  8. class LanguageUpdater:
  9. def __init__(self, config):
  10. self.config = config
  11. self.hasher = Hasher(config)
  12. def update_language_if_changed(self):
  13. """Test if the language has changed, and if it has, update it"""
  14. if self.language_has_changed():
  15. self.update_language()
  16. self.save_language_hash()
  17. def language_has_changed(self):
  18. """Use hashes to test if the language has changed"""
  19. self.stored_hash = self.hasher['language']
  20. # Calculate the hash the language file has right now
  21. hasher = self.hasher.get_hash_object()
  22. with open(self.config.strings_file, 'rb') as sfile:
  23. buf = sfile.read()
  24. hasher.update(buf)
  25. self.new_hash = hasher.hexdigest()
  26. return self.new_hash != self.stored_hash
  27. def update_language(self):
  28. """Update the language using the online lmtool"""
  29. print('Updating language using online lmtool')
  30. host = 'http://www.speech.cs.cmu.edu'
  31. url = host + '/cgi-bin/tools/lmtool/run'
  32. # Submit the corpus to the lmtool
  33. response_text = ""
  34. with open(self.config.strings_file, 'rb') as corpus:
  35. files = {'corpus': corpus}
  36. values = {'formtype': 'simple'}
  37. r = requests.post(url, files=files, data=values)
  38. response_text = r.text
  39. # Parse response to get URLs of the files we need
  40. path_re = r'.*<title>Index of (.*?)</title>.*'
  41. number_re = r'.*TAR([0-9]*?)\.tgz.*'
  42. for line in response_text.split('\n'):
  43. # If we found the directory, keep it and don't break
  44. if re.search(path_re, line):
  45. path = host + re.sub(path_re, r'\1', line)
  46. # If we found the number, keep it and break
  47. elif re.search(number_re, line):
  48. number = re.sub(number_re, r'\1', line)
  49. break
  50. lm_url = path + '/' + number + '.lm'
  51. dic_url = path + '/' + number + '.dic'
  52. self._download_file(lm_url, self.config.lang_file)
  53. self._download_file(dic_url, self.config.dic_file)
  54. def save_language_hash(self):
  55. self.hasher['language'] = self.new_hash
  56. self.hasher.store()
  57. def _download_file(self, url, path):
  58. r = requests.get(url, stream=True)
  59. if r.status_code == 200:
  60. with open(path, 'wb') as f:
  61. for chunk in r:
  62. f.write(chunk)