36
Regexes and Performance Testing Doug Hellmann PyATL May 2015

Regexes and-performance-testing

Embed Size (px)

Citation preview

Regexes and Performance Testing

Doug Hellmann PyATL May 2015

def mask_password(message, secret="***"): """Replace password with 'secret' in message.

:param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """

try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass

# NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message

substitute = r'\g<1>' + secret + r'\g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message)

substitute = r'\g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message)

return message

> 3000 times faster

def mask_password(message, secret="***"): """Replace password with 'secret' in message.

:param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """

try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass

# NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message

substitute = r'\g<1>' + secret + r'\g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message)

substitute = r'\g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message)

return message

_SANITIZE_KEYS = ['adminPass', 'admin_pass', 'password', 'admin_password', 'auth_token', 'new_pass', 'auth_password', 'secret_uuid']

# NOTE(ldbragst): Let's build a list of regex objects using the list of# _SANITIZE_KEYS we already have. This way, we only have to add the new key# to the list of _SANITIZE_KEYS and we can generate regular expressions# for XML and JSON automatically._SANITIZE_PATTERNS_2 = []_SANITIZE_PATTERNS_1 = []

# NOTE(amrith): Some regular expressions have only one parameter, some# have two parameters. Use different lists of patterns here._FORMAT_PATTERNS_1 = [r'(%(key)s\s*[=]\s*)[^\s^\'^\"]+']_FORMAT_PATTERNS_2 = [r'(%(key)s\s*[=]\s*[\"\']).*?([\"\'])', r'(%(key)s\s+[\"\']).*?([\"\'])', r'([-]{2}%(key)s\s+)[^\'^\"^=^\s]+([\s]*)', r'(<%(key)s>).*?(</%(key)s>)', r'([\"\']%(key)s[\"\']\s*:\s*[\"\']).*?([\"\'])', r'([\'"].*?%(key)s[\'"]\s*:\s*u?[\'"]).*?([\'"])', r'([\'"].*?%(key)s[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?' '[\'"]).*?([\'"])', r'(%(key)s\s*--?[A-z]+\s*)\S+(\s*)']

for key in _SANITIZE_KEYS: for pattern in _FORMAT_PATTERNS_2: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_2.append(reg_ex)

for pattern in _FORMAT_PATTERNS_1: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_1.append(reg_ex)

Bottleneck?

def mask_password(message, secret="***"): """Replace password with 'secret' in message.

:param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """

try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass

# NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message

substitute = r'\g<1>' + secret + r'\g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message)

substitute = r'\g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message)

return message

class timeit.Timer(stmt='pass', setup='pass', timer=<timer function>)

timeit(number=1000000)

t = timeit.Timer( stmt="re.sub(pattern, r'\g<1>***\g<2>', payload)", setup="""import repayload = '''%s'''pattern = re.compile(r'''%s''')""" % (input_str, pattern.pattern))result = t.timeit(1)

#!/usr/bin/env python

from __future__ import print_function

import timeit

import strutils

# A moderately sized input (~50K) string# http://paste.openstack.org/raw/155864/infile = 'sample.json'

with open(infile, 'r') as f: input_str = f.read()print('payload has %d bytes' % len(input_str))

times = []

for pattern in strutils._SANITIZE_PATTERNS_2: print('\ntesting %s' % pattern.pattern) t = timeit.Timer( "re.sub(pattern, r'\g<1>***\g<2>', payload)", """import repayload = '''%s'''pattern = re.compile(r'''%s''')""" % (input_str, pattern.pattern)) result = t.timeit(1) print(result) times.append((result, pattern.pattern))

print('\nslowest to fastest:')times = reversed(sorted(times))for t in times: print(‘%s - %s’ % t)

print('\ntesting all patterns')t = timeit.Timer( "strutils.mask_password('''" + input_str + "''')", "import strutils",)print(t.timeit(1))

$ python perf_test_mask_password.pypayload has 50000 bytes

testing (adminPass\s*[=]\s*[\"\']).*?([\"\'])0.000424146652222

testing (adminPass\s+[\"\']).*?([\"\'])0.000438928604126

testing ([-]{2}adminPass\s+)[^\'^\"^=^\s]+([\s]*)0.000957012176514

testing (<adminPass>).*?(</adminPass>)0.000320911407471

testing ([\"\']adminPass[\"\']\s*:\s*[\"\']).*?([\"\'])0.000932931900024

testing ([\'"].*?adminPass[\'"]\s*:\s*u?[\'"]).*?([\'"])1.39579486847

testing ([\'"].*?adminPass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.38312697411

testing (adminPass\s*--?[A-z]+\s*)\S+(\s*)0.000363111495972

testing (admin_pass\s*[=]\s*[\"\']).*?([\"\'])0.000358819961548

testing (admin_pass\s+[\"\']).*?([\"\'])0.000358104705811

1.38247299194 - ([\'"].*?adminPass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.36443901062 - ([\'"].*?auth_password[\'"]\s*:\s*u?[\'"]).*?([\'"])1.36225605011 - ([\'"].*?auth_token[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.35945916176 - ([\'"].*?admin_pass[\'"]\s*:\s*u?[\'"]).*?([\'"])1.358741045 - ([\'"].*?admin_password[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.35629177094 - ([\'"].*?admin_password[\'"]\s*:\s*u?[\'"]).*?([\'"])1.35611796379 - ([\'"].*?new_pass[\'"]\s*:\s*u?[\'"]).*?([\'"])1.35091495514 - ([\'"].*?auth_password[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.34805893898 - ([\'"].*?new_pass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.34525895119 - ([\'"].*?admin_pass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.3426618576 - ([\'"].*?adminPass[\'"]\s*:\s*u?[\'"]).*?([\'"])1.33739089966 - ([\'"].*?secret_uuid[\'"]\s*:\s*u?[\'"]).*?([\'"])1.31182909012 - ([\'"].*?secret_uuid[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.30403113365 - ([\'"].*?auth_token[\'"]\s*:\s*u?[\'"]).*?([\'"])1.29068493843 - ([\'"].*?password[\'"]\s*:\s*u?[\'"]).*?([\'"])1.28651189804 - ([\'"].*?password[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])0.00123500823975 - ([\"\']password[\"\']\s*:\s*[\"\']).*?([\"\'])0.00123286247253 - ([\"\']adminPass[\"\']\s*:\s*[\"\']).*?([\"\'])

$ cat results

1.38247299194 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.36443901062 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.36225605011 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.35945916176 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.358741045 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.35629177094 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.35611796379 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.35091495514 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.34805893898 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.34525895119 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.3426618576 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.33739089966 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.31182909012 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])1.30403113365 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.29068493843 - ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])1.28651189804 - ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])

$ cat results | sed -E 's/^.{16}/ /g'

([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"])

$ cat results | sed -E 's/^.{16}/ /g’ | sort

([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])

$ cat results | sed -E 's/^.{16}/ /g’ | sort -u

([\'"].*?KEY[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"]).*?(['"]) ([\'"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])

([\’"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])

“admin_pass”: “super secret”“admin_pass”: “***”

( start group[\’”] quote.*? anythingKEY literal key[\’”] quote\s*:\s* colon, maybe with whitespaceu? optional literal u[\’”] quote) end group.*? anything([\’"]) group with quote

group 1: “admin_pass”: “group 2: “

([\’"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])

“admin_pass”: “super secret”“admin_pass”: “***”

( start group[\’”] quote.*? anythingKEY literal key[\’”] quote\s*:\s* colon, maybe with whitespaceu? optional literal u[\’”] quote) end group.*? anything([\’"]) group with quote

group 1: “admin_pass”: “group 2: “

(”)(.*?)(”)

“super secret”

““s“su“sup“supe“super“super “super s“super se“super sec“super secr“super secret“super secret“super secret”

(“)(.*)(”)

“super secret”

“super secret”“super secret“super secret”

Change 1: Be Specific In Patterns

([\’"].*?KEY[\'"]\s*:\s*u?[\'"]).*?([\'"])

([\'"][^"\']*KEY[\'"]\s*:\s*u?[\'"])[^\"\']*([\'"])

Before:

testing all patterns22.2821819782

After:

testing all patterns0.0565450191498

22.28 / 0.0565 = 394.3

def mask_password(message, secret="***"): """Replace password with 'secret' in message.

:param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """

try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass

# NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. if not any(key in message for key in _SANITIZE_KEYS): return message

substitute = r'\g<1>' + secret + r'\g<2>' for pattern in _SANITIZE_PATTERNS_2: message = re.sub(pattern, substitute, message)

substitute = r'\g<1>' + secret for pattern in _SANITIZE_PATTERNS_1: message = re.sub(pattern, substitute, message)

return message

8 keys * 10 templates = 80 patterns

#!/usr/bin/env python

from __future__ import print_function

import timeit

infile = 'sample.json'

with open(infile, 'r') as f: input_str = f.read()

t = timeit.Timer( "re.sub(pattern, r'\g<1>***\g<2>', payload)", """import repayload = '''%s'''pattern = re.compile(r'admin_pass')""" % input_str)print('regex : %10.10f' % t.timeit(5))

t = timeit.Timer( "'admin_pass' in payload", "payload = '''%s'''" % input_str,)print('literal: %10.10f' % t.timeit(5))

$ python literal_vs_regex.pyregex : 0.0003869534literal: 0.0001130104

Change 2: Look at Fewer Patterns

def mask_password(message, secret="***"): """Replace password with 'secret' in message.

:param message: The string which includes security information. :param secret: value with which to replace passwords. :returns: The unicode value of message with the password fields masked. """

try: message = six.text_type(message) except UnicodeDecodeError: # NOTE(jecarey): Temporary fix to handle cases where message is a # byte string. A better solution will be provided in Kilo. pass

substitute1 = r'\g<1>' + secret substitute2 = r'\g<1>' + secret + r'\g<2>'

# NOTE(ldbragst): Check to see if anything in message contains any key # specified in _SANITIZE_KEYS, if not then just return the message since # we don't have to mask any passwords. for key in _SANITIZE_KEYS: if key in message: for pattern in _SANITIZE_PATTERNS_2[key]: message = re.sub(pattern, substitute2, message) for pattern in _SANITIZE_PATTERNS_1[key]: message = re.sub(pattern, substitute1, message)

return message

# NOTE(ldbragst): Let's build a list of regex objects using the list of# _SANITIZE_KEYS we already have. This way, we only have to add the new key# to the list of _SANITIZE_KEYS and we can generate regular expressions# for XML and JSON automatically._SANITIZE_PATTERNS_2 = {}_SANITIZE_PATTERNS_1 = {}

# NOTE(dhellmann): Keep a separate list of patterns by key so we only# need to apply the substitutions for keys we find using a quick "in"# test.for key in _SANITIZE_KEYS: _SANITIZE_PATTERNS_1[key] = [] _SANITIZE_PATTERNS_2[key] = []

for pattern in _FORMAT_PATTERNS_2: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_2[key].append(reg_ex)

for pattern in _FORMAT_PATTERNS_1: reg_ex = re.compile(pattern % {'key': key}, re.DOTALL) _SANITIZE_PATTERNS_1[key].append(reg_ex)

>>> import strutils>>> strutils._SANITIZE_PATTERNS_2

{'admin_pass': [<_sre.SRE_Pattern object at 0x7fbf4361d9b0>, <_sre.SRE_Pattern object at 0x7fbf43620660>, <_sre.SRE_Pattern object at 0x7fbf43621200>, <_sre.SRE_Pattern object at 0x7fbf43621ac0>, <_sre.SRE_Pattern object at 0x7fbf43627e70>, <_sre.SRE_Pattern object at 0x7fbf43629a30>, <_sre.SRE_Pattern object at 0x7fbf4362a3f0>, <_sre.SRE_Pattern object at 0x7fbf43629fc0>], 'auth_password': [<_sre.SRE_Pattern object at 0x7fbf436322d0>, <_sre.SRE_Pattern object at 0x7fbf436324a0>, <_sre.SRE_Pattern object at 0x7fbf43632640>, <_sre.SRE_Pattern object at 0x7fbf43632800>, <_sre.SRE_Pattern object at 0x7fbf43631d60>, <_sre.SRE_Pattern object at 0x7fbf43633200>, <_sre.SRE_Pattern object at 0x7fbf43633690>, <_sre.SRE_Pattern object at 0x7fbf43633e90>], …}

>>> strutils._SANITIZE_PATTERNS_2.keys()

['admin_pass', 'auth_password', 'adminPass', 'auth_token', 'secret_uuid', 'admin_password', 'new_pass', ‘password']

>>> strutils._SANITIZE_PATTERNS_2[‘admin_pass’]

[<_sre.SRE_Pattern object at 0x7fbf4361d9b0>, …]

times = []

for pattern in strutils._SANITIZE_PATTERNS_2['admin_pass']: print('\ntesting %s' % pattern.pattern) t = timeit.Timer( "re.sub(pattern, r'\g<1>***\g<2>', payload)", """import repayload = '''%s'''pattern = re.compile(r'''%s''')""" % (input_str, pattern.pattern)) result = t.timeit(1) print(result) times.append((result, pattern.pattern))

print('\nslowest to fastest:')times = reversed(sorted(times))for t in times: print('%s - %s ' % t)

print('\ntesting all patterns')t = timeit.Timer( "strutils.mask_password('''" + input_str + "''')", "import strutils",)print(t.timeit(1))

$ python ./perf_test_mask_password.pypayload has 50000 bytes

testing (admin_pass\s*[=]\s*[\"\'])[^\"\']*([\"\'])0.000396966934204

testing (admin_pass\s+[\"\'])[^\"\']*([\"\'])0.000360012054443

testing ([-]{2}admin_pass\s+)[^\'^\"^=^\s]+([\s]*)0.00097393989563

testing (<admin_pass>)[^<]*(</admin_pass>)0.00052809715271

testing ([\"\']admin_pass[\"\']\s*:\s*[\"\'])[^\"\']*([\"\'])0.00141596794128

testing ([\'"][^"\']*admin_pass[\'"]\s*:\s*u?[\'"])[^\"\']*(['"])0.00194191932678

testing ([\'"][^\'"]*admin_pass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"])[^"']*(['"])0.00175809860229

testing (admin_pass\s*--?[A-z]+\s*)\S+(\s*)0.000356912612915

slowest to fastest:0.00194191932678 - ([\'"][^"\']*admin_pass[\'"]\s*:\s*u?[\'"])[^\"\']*(['"])0.00175809860229 - ([\'"][^\'"]*admin_pass[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?['"])[^"']*(['"])0.00141596794128 - ([\"\']admin_pass[\"\']\s*:\s*[\"\'])[^\"\']*([\"\'])0.00097393989563 - ([-]{2}admin_pass\s+)[^\'^\"^=^\s]+([\s]*)0.00052809715271 - (<admin_pass>)[^<]*(</admin_pass>)0.000396966934204 - (admin_pass\s*[=]\s*[\"\'])[^\"\']*([\"\'])0.000360012054443 - (admin_pass\s+[\"\'])[^\"\']*([\"\'])0.000356912612915 - (admin_pass\s*--?[A-z]+\s*)\S+(\s*)

testing all patterns0.00732088088989

22.28 / 0.0073 = 3052

References• https://github.com/dhellmann/pyatl-regex-performance

• https://review.openstack.org/#/c/145562/

• http://git.openstack.org/cgit/openstack/oslo.utils/tree/oslo_utils/strutils.py

• https://docs.python.org/2.7/library/re.html

• https://docs.python.org/2/howto/regex.html#greedy-versus-non-greedy

• https://docs.python.org/2.7/library/timeit.html

• Mastering Regular Expressions, Jeffrey E.F. Friedl