Somewhat fancy voice command recognition software
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

languageupdater.py 2.8KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. # This is part of Kaylee
  2. # -- this code is licensed GPLv3
  3. # Copyright 2013 Jezra
  4. # Copyright 2015 Clayton G. Hobbs
  5. import hashlib
  6. import json
  7. import re
  8. import requests
  9. class LanguageUpdater:
  10. def __init__(self, config):
  11. self.config = config
  12. def update_language_if_changed(self):
  13. """Test if the language has changed, and if it has, update it"""
  14. if self.language_has_changed():
  15. self.update_language()
  16. self.save_language_hash()
  17. def language_has_changed(self):
  18. """Use SHA256 hashes to test if the language has changed"""
  19. # Load the stored hash from the hash file
  20. try:
  21. with open(self.config.hash_file, 'r') as f:
  22. hashes = json.load(f)
  23. self.stored_hash = hashes['language']
  24. except (IOError, KeyError, TypeError):
  25. # No stored hash
  26. self.stored_hash = ''
  27. # Calculate the hash the language file has right now
  28. hasher = hashlib.sha256()
  29. with open(self.config.strings_file, 'rb') as sfile:
  30. buf = sfile.read()
  31. hasher.update(buf)
  32. self.new_hash = hasher.hexdigest()
  33. return self.new_hash != self.stored_hash
  34. def update_language(self):
  35. """Update the language using the online lmtool"""
  36. print('Updating language using online lmtool')
  37. host = 'http://www.speech.cs.cmu.edu'
  38. url = host + '/cgi-bin/tools/lmtool/run'
  39. # Prepare request
  40. files = {'corpus': open(self.config.strings_file, 'rb')}
  41. values = {'formtype': 'simple'}
  42. # Send corpus to the server
  43. r = requests.post(url, files=files, data=values)
  44. # Parse response to get URLs of the files we need
  45. path_re = r'.*<title>Index of (.*?)</title>.*'
  46. number_re = r'.*TAR[0-9]*?\.tgz.*'
  47. for line in r.text.split('\n'):
  48. # If we found the directory, keep it and don't break
  49. if re.search(path_re, line):
  50. path = host + re.sub(path_re, r'\1', line)
  51. # If we found the number, keep it and break
  52. elif re.search(number_re, line):
  53. number = re.sub(number_re, r'\1', line)
  54. break
  55. lm_url = path + '/' + number + '.lm'
  56. dic_url = path + '/' + number + '.dic'
  57. self._download_file(lm_url, self.config.lang_file)
  58. self._download_file(dic_url, self.config.dic_file)
  59. def save_language_hash(self):
  60. new_hashes = {'language': self.new_hash}
  61. with open(self.config.hash_file, 'w') as f:
  62. json.dump(new_hashes, f)
  63. def _download_file(self, url, path):
  64. r = requests.get(url, stream=True)
  65. if r.status_code == 200:
  66. with open(path, 'wb') as f:
  67. for chunk in r:
  68. f.write(chunk)