View
55
Download
1
Category
Preview:
Citation preview
def mask_password(message, secret="***"): """Replace password with 'secret' in message.
:param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """
try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass
# NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message
substitute = r'\g<1>' + secret + r'\g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message)
substitute = r'\g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message)
return message
def mask_password(message, secret="***"): """Replace password with 'secret' in message.
:param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """
try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass
# NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message
substitute = r'\g<1>' + secret + r'\g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message)
substitute = r'\g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message)
return message
_SANITIZE_KEYS = ['adminPass', 'admin_pass', 'password', 'admin_password', 'auth_token', 'new_pass', 'auth_password', 'secret_uuid']
# NOTE(ldbragst): Let's build a list of regex objects using the list of# _SANITIZE_KEYS we already have. This way, we only have to add the new key# to the list of _SANITIZE_KEYS and we can generate regular expressions# for XML and JSON automatically._SANITIZE_PATTERNS_2 = []_SANITIZE_PATTERNS_1 = []
# NOTE(amrith): Some regular expressions have only one parameter, some# have two parameters. Use different lists of patterns here._FORMAT_PATTERNS_1 = [r'(%(key)s\s*[=]\s*)[^\s^\'^\"]+']_FORMAT_PATTERNS_2 = [r'(%(key)s\s*[=]\s*[\"\']).*?([\"\'])', r'(%(key)s\s+[\"\']).*?([\"\'])', r'([-]{2}%(key)s\s+)[^\'^\"^=^\s]+([\s]*)', r'(<%(key)s>).*?(</%(key)s>)', r'([\"\']%(key)s[\"\']\s*:\s*[\"\']).*?([\"\'])', r'([\'"].*?%(key)s[\'"]\s*:\s*u?[\'"]).*?([\'"])', r'([\'"].*?%(key)s[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?' '[\'"]).*?([\'"])', r'(%(key)s\s*--?[A-z]+\s*)\S+(\s*)']
for key in _SANITIZE_KEYS: for pattern in _FORMAT_PATTERNS_2: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_2.append(reg_ex)
for pattern in _FORMAT_PATTERNS_1: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_1.append(reg_ex)
def mask_password(message, secret="***"): """Replace password with 'secret' in message.
:param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """
try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass
# NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message
substitute = r'\g<1>' + secret + r'\g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message)
substitute = r'\g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message)
return message
class timeit.Timer(stmt='pass', setup='pass', timer=<timer function>)
timeit(number=1000000)
t = timeit.Timer( stmt="re.sub(pattern, r'\g<1>***\g<2>', payload)", setup="""import repayload = '''%s'''pattern = re.compile(r'''%s''')""" % (input_str, pattern.pattern))result = t.timeit(1)
#!/usr/bin/env python
from __future__ import print_function
import timeit
import strutils
# A moderately sized input (~50K) string# http://paste.openstack.org/raw/155864/infile = 'sample.json'
with open(infile, 'r') as f: input_str = f.read()print('payload has %d bytes' % len(input_str))
times = []
for pattern in strutils._SANITIZE_PATTERNS_2: print('\ntesting %s' % pattern.pattern) t = timeit.Timer( "re.sub(pattern, r'\g<1>***\g<2>', payload)", """import repayload = '''%s'''pattern = re.compile(r'''%s''')""" % (input_str, pattern.pattern)) result = t.timeit(1) print(result) times.append((result, pattern.pattern))
print('\nslowest to fastest:')times = reversed(sorted(times))for t in times: print(‘%s - %s’ % t)
print('\ntesting all patterns')t = timeit.Timer( "strutils.mask_password('''" + input_str + "''')", "import strutils",)print(t.timeit(1))
$ python perf_test_mask_password.pypayload has 50000 bytes
testing (adminPass\s*[=]\s*[\"\']).*?([\"\'])0.000424146652222
testing (adminPass\s+[\"\']).*?([\"\'])0.000438928604126
testing ([-]{2}adminPass\s+)[^\'^\"^=^\s]+([\s]*)0.000957012176514
testing (<adminPass>).*?(</adminPass>)0.000320911407471
testing ([\"\']adminPass[\"\']\s*:\s*[\"\']).*?([\"\'])0.000932931900024
testing ([\'"].*?adminPass[\'"]\s*:\s*u?[\'"]).*?([\'"])1.39579486847
testing ([\'"].*?adminPass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.38312697411
testing (adminPass\s*--?[A-z]+\s*)\S+(\s*)0.000363111495972
testing (admin_pass\s*[=]\s*[\"\']).*?([\"\'])0.000358819961548
testing (admin_pass\s+[\"\']).*?([\"\'])0.000358104705811
1.38247299194 - ([\'"].*?adminPass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.36443901062 - ([\'"].*?auth_password[\'"]\s*:\s*u?[\'"]).*?([\'"])1.36225605011 - ([\'"].*?auth_token[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.35945916176 - ([\'"].*?admin_pass[\'"]\s*:\s*u?[\'"]).*?([\'"])1.358741045 - ([\'"].*?admin_password[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.35629177094 - ([\'"].*?admin_password[\'"]\s*:\s*u?[\'"]).*?([\'"])1.35611796379 - ([\'"].*?new_pass[\'"]\s*:\s*u?[\'"]).*?([\'"])1.35091495514 - ([\'"].*?auth_password[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.34805893898 - ([\'"].*?new_pass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.34525895119 - ([\'"].*?admin_pass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.3426618576 - ([\'"].*?adminPass[\'"]\s*:\s*u?[\'"]).*?([\'"])1.33739089966 - ([\'"].*?secret_uuid[\'"]\s*:\s*u?[\'"]).*?([\'"])1.31182909012 - ([\'"].*?secret_uuid[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.30403113365 - ([\'"].*?auth_token[\'"]\s*:\s*u?[\'"]).*?([\'"])1.29068493843 - ([\'"].*?password[\'"]\s*:\s*u?[\'"]).*?([\'"])1.28651189804 - ([\'"].*?password[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])0.00123500823975 - ([\"\']password[\"\']\s*:\s*[\"\']).*?([\"\'])0.00123286247253 - ([\"\']adminPass[\"\']\s*:\s*[\"\']).*?([\"\'])
$ cat results
1.38247299194 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.36443901062 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.36225605011 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.35945916176 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.358741045 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.35629177094 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.35611796379 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.35091495514 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.34805893898 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.34525895119 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.3426618576 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.33739089966 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.31182909012 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.30403113365 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.29068493843 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.28651189804 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])
$ cat results | sed -E 's/^.{16}/ /g'
([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])
$ cat results | sed -E 's/^.{16}/ /g’ | sort
([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])
$ cat results | sed -E 's/^.{16}/ /g’ | sort -u
([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])
([\’"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])
“admin_pass”: “super secret”“admin_pass”: “***”
( start group[\’”] quote.*? anythingKEY literal key[\’”] quote\s*:\s* colon, maybe with whitespaceu? optional literal u[\’”] quote) end group.*? anything([\’"]) group with quote
group 1: “admin_pass”: “group 2: “
([\’"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])
“admin_pass”: “super secret”“admin_pass”: “***”
( start group[\’”] quote.*? anythingKEY literal key[\’”] quote\s*:\s* colon, maybe with whitespaceu? optional literal u[\’”] quote) end group.*? anything([\’"]) group with quote
group 1: “admin_pass”: “group 2: “
(”)(.*?)(”)
“super secret”
““s“su“sup“supe“super“super “super s“super se“super sec“super secr“super secret“super secret“super secret”
(“)(.*)(”)
“super secret”
“super secret”“super secret“super secret”
Before:
testing all patterns22.2821819782
After:
testing all patterns0.0565450191498
22.28 / 0.0565 = 394.3
def mask_password(message, secret="***"): """Replace password with 'secret' in message.
:param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """
try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass
# NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message
substitute = r'\g<1>' + secret + r'\g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message)
substitute = r'\g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message)
return message
#!/usr/bin/env python
from __future__ import print_function
import timeit
infile = 'sample.json'
with open(infile, 'r') as f: input_str = f.read()
t = timeit.Timer( "re.sub(pattern, r'\g<1>***\g<2>', payload)", """import repayload = '''%s'''pattern = re.compile(r'admin_pass')""" % input_str)print('regex : %10.10f' % t.timeit(5))
t = timeit.Timer( "'admin_pass' in payload", "payload = '''%s'''" % input_str,)print('literal: %10.10f' % t.timeit(5))
def mask_password(message, secret="***"): """Replace password with 'secret' in message.
:param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """
try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass
substitute1 = r'\g<1>' + secret substitute2 = r'\g<1>' + secret + r'\g<2>'
# NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. for key in _SANITIZE_KEYS: if key in message: for pattern in _SANITIZE_PATTERNS_2[key]: message = re.sub(pattern, substitute2, message) for pattern in _SANITIZE_PATTERNS_1[key]: message = re.sub(pattern, substitute1, message)
return message
# NOTE(ldbragst): Let's build a list of regex objects using the list of# _SANITIZE_KEYS we already have. This way, we only have to add the new key# to the list of _SANITIZE_KEYS and we can generate regular expressions# for XML and JSON automatically._SANITIZE_PATTERNS_2 = {}_SANITIZE_PATTERNS_1 = {}
# NOTE(dhellmann): Keep a separate list of patterns by key so we only# need to apply the substitutions for keys we find using a quick "in"# test.for key in _SANITIZE_KEYS: _SANITIZE_PATTERNS_1[key] = [] _SANITIZE_PATTERNS_2[key] = []
for pattern in _FORMAT_PATTERNS_2: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_2[key].append(reg_ex)
for pattern in _FORMAT_PATTERNS_1: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_1[key].append(reg_ex)
>>> import strutils>>> strutils._SANITIZE_PATTERNS_2
{'admin_pass': [<_sre.SRE_Pattern object at 0x7fbf4361d9b0>, <_sre.SRE_Pattern object at 0x7fbf43620660>, <_sre.SRE_Pattern object at 0x7fbf43621200>, <_sre.SRE_Pattern object at 0x7fbf43621ac0>, <_sre.SRE_Pattern object at 0x7fbf43627e70>, <_sre.SRE_Pattern object at 0x7fbf43629a30>, <_sre.SRE_Pattern object at 0x7fbf4362a3f0>, <_sre.SRE_Pattern object at 0x7fbf43629fc0>], 'auth_password': [<_sre.SRE_Pattern object at 0x7fbf436322d0>, <_sre.SRE_Pattern object at 0x7fbf436324a0>, <_sre.SRE_Pattern object at 0x7fbf43632640>, <_sre.SRE_Pattern object at 0x7fbf43632800>, <_sre.SRE_Pattern object at 0x7fbf43631d60>, <_sre.SRE_Pattern object at 0x7fbf43633200>, <_sre.SRE_Pattern object at 0x7fbf43633690>, <_sre.SRE_Pattern object at 0x7fbf43633e90>], …}
>>> strutils._SANITIZE_PATTERNS_2.keys()
['admin_pass', 'auth_password', 'adminPass', 'auth_token', 'secret_uuid', 'admin_password', 'new_pass', ‘password']
>>> strutils._SANITIZE_PATTERNS_2[‘admin_pass’]
[<_sre.SRE_Pattern object at 0x7fbf4361d9b0>, …]
times = []
for pattern in strutils._SANITIZE_PATTERNS_2['admin_pass']: print('\ntesting %s' % pattern.pattern) t = timeit.Timer( "re.sub(pattern, r'\g<1>***\g<2>', payload)", """import repayload = '''%s'''pattern = re.compile(r'''%s''')""" % (input_str, pattern.pattern)) result = t.timeit(1) print(result) times.append((result, pattern.pattern))
print('\nslowest to fastest:')times = reversed(sorted(times))for t in times: print('%s - %s ' % t)
print('\ntesting all patterns')t = timeit.Timer( "strutils.mask_password('''" + input_str + "''')", "import strutils",)print(t.timeit(1))
$ python ./perf_test_mask_password.pypayload has 50000 bytes
testing (admin_pass\s*[=]\s*[\"\'])[^\"\']*([\"\'])0.000396966934204
testing (admin_pass\s+[\"\'])[^\"\']*([\"\'])0.000360012054443
testing ([-]{2}admin_pass\s+)[^\'^\"^=^\s]+([\s]*)0.00097393989563
testing (<admin_pass>)[^<]*(</admin_pass>)0.00052809715271
testing ([\"\']admin_pass[\"\']\s*:\s*[\"\'])[^\"\']*([\"\'])0.00141596794128
testing ([\'"][^"\']*admin_pass[\'"]\s*:\s*u?[\'"])[^\"\']*(['"])0.00194191932678
testing ([\'"][^\'"]*admin_pass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"])[^"']*(['"])0.00175809860229
testing (admin_pass\s*--?[A-z]+\s*)\S+(\s*)0.000356912612915
slowest to fastest:0.00194191932678 - ([\'"][^"\']*admin_pass[\'"]\s*:\s*u?[\'"])[^\"\']*(['"])0.00175809860229 - ([\'"][^\'"]*admin_pass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"])[^"']*(['"])0.00141596794128 - ([\"\']admin_pass[\"\']\s*:\s*[\"\'])[^\"\']*([\"\'])0.00097393989563 - ([-]{2}admin_pass\s+)[^\'^\"^=^\s]+([\s]*)0.00052809715271 - (<admin_pass>)[^<]*(</admin_pass>)0.000396966934204 - (admin_pass\s*[=]\s*[\"\'])[^\"\']*([\"\'])0.000360012054443 - (admin_pass\s+[\"\'])[^\"\']*([\"\'])0.000356912612915 - (admin_pass\s*--?[A-z]+\s*)\S+(\s*)
testing all patterns0.00732088088989
References• https://github.com/dhellmann/pyatl-regex-performance
• https://review.openstack.org/#/c/145562/
• http://git.openstack.org/cgit/openstack/oslo.utils/tree/oslo_utils/strutils.py
• https://docs.python.org/2.7/library/re.html
• https://docs.python.org/2/howto/regex.html#greedy-versus-non-greedy
• https://docs.python.org/2.7/library/timeit.html
• Mastering Regular Expressions, Jeffrey E.F. Friedl
Recommended