>>> from fuzzingbook.Reducer import <identifier>

!python -c 'x = 1 + 2 * 3 / 0'

Traceback (most recent call last):
  File "<string>", line 1, in <module>
    x = 1 + 2 * 3 / 0
            ~~~~~~^~~
ZeroDivisionError: division by zero

class ZeroDivisionRunner(ProgramRunner):
    """Make outcome 'FAIL' if ZeroDivisionError occurs"""

    def run(self, inp: str = "") -> Tuple[subprocess.CompletedProcess, Outcome]:
        process, outcome = super().run(inp)
        if process.stderr.find('ZeroDivisionError') >= 0:
            outcome = 'FAIL'
        return process, outcome

python_input = "x = 1 + 2 * 3 / 0"
python_runner = ZeroDivisionRunner("python")
process, outcome = python_runner.run(python_input)
outcome

'FAIL'

dd = DeltaDebuggingReducer(python_runner)
dd.reduce(python_input)

'3/0'

# ignore
from ClassDiagram import display_class_hierarchy

# ignore
display_class_hierarchy([DeltaDebuggingReducer, GrammarReducer],
                        public_methods=[
                            Reducer.__init__,
                            Reducer.reset,
                            Reducer.reduce,
                            DeltaDebuggingReducer.reduce,
                            GrammarReducer.__init__,
                            GrammarReducer.reduce,
                        ],
                        types={
                            'DerivationTree': DerivationTree,
                            'Grammar': Grammar,
                            'Outcome': Outcome,
                        },
                        project='fuzzingbook')

class MysteryRunner(Runner):
    def run(self, inp: str) -> Tuple[str, Outcome]:
        x = inp.find(chr(0o17 + 0o31))
        y = inp.find(chr(0o27 + 0o22))
        if x >= 0 and y >= 0 and x < y:
            return (inp, Runner.FAIL)
        else:
            return (inp, Runner.PASS)

mystery = MysteryRunner()
random_fuzzer = RandomFuzzer()
while True:
    inp = random_fuzzer.fuzz()
    result, outcome = mystery.run(inp)
    if outcome == mystery.FAIL:
        break

failing_input = result
failing_input

' 7:,>((/$$-/->.;.=;(.%!:50#7*8=$&&=$9!%6(4=&69\':\'<3+0-3.24#7=!&60)2/+";+<7+1<2!4$>92+$1<(3%&5\'\'>#'

failing_input

' 7:,>((/$$-/->.;.=;(.%!:50#7*8=$&&=$9!%6(4=&69\':\'<3+0-3.24#7=!&60)2/+";+<7+1<2!4$>92+$1<(3%&5\'\'>#'

half_length = len(failing_input) // 2   # // is integer division
first_half = failing_input[:half_length]
mystery.run(first_half)

(" 7:,>((/$$-/->.;.=;(.%!:50#7*8=$&&=$9!%6(4=&69':", 'PASS')

second_half = failing_input[half_length:]
mystery.run(second_half)

('\'<3+0-3.24#7=!&60)2/+";+<7+1<2!4$>92+$1<(3%&5\'\'>#', 'PASS')

quarter_length = len(failing_input) // 4
input_without_first_quarter = failing_input[quarter_length:]
mystery.run(input_without_first_quarter)

('50#7*8=$&&=$9!%6(4=&69\':\'<3+0-3.24#7=!&60)2/+";+<7+1<2!4$>92+$1<(3%&5\'\'>#',
 'FAIL')

input_without_first_and_second_quarter = failing_input[quarter_length * 2:]
mystery.run(input_without_first_and_second_quarter)

('\'<3+0-3.24#7=!&60)2/+";+<7+1<2!4$>92+$1<(3%&5\'\'>#', 'PASS')

second_half

'\'<3+0-3.24#7=!&60)2/+";+<7+1<2!4$>92+$1<(3%&5\'\'>#'

input_without_first_and_second_quarter

'\'<3+0-3.24#7=!&60)2/+";+<7+1<2!4$>92+$1<(3%&5\'\'>#'

input_without_first_and_third_quarter = failing_input[quarter_length:
                                                      quarter_length * 2] + failing_input[quarter_length * 3:]
mystery.run(input_without_first_and_third_quarter)

("50#7*8=$&&=$9!%6(4=&69':<7+1<2!4$>92+$1<(3%&5''>#", 'PASS')

input_without_first_and_fourth_quarter = failing_input[quarter_length:quarter_length * 3]
mystery.run(input_without_first_and_fourth_quarter)

('50#7*8=$&&=$9!%6(4=&69\':\'<3+0-3.24#7=!&60)2/+";+', 'FAIL')

class Reducer:
    """Base class for reducers."""

    def __init__(self, runner: Runner, log_test: bool = False) -> None:
        """Attach reducer to the given `runner`"""
        self.runner = runner
        self.log_test = log_test
        self.reset()

    def reset(self) -> None:
        """Reset the test counter to zero. To be extended in subclasses."""
        self.tests = 0

    def test(self, inp: str) -> Outcome:
        """Test with input `inp`. Return outcome.
        To be extended in subclasses."""

        result, outcome = self.runner.run(inp)
        self.tests += 1
        if self.log_test:
            print("Test #%d" % self.tests, repr(inp), repr(len(inp)), outcome)
        return outcome

    def reduce(self, inp: str) -> str:
        """Reduce input `inp`. Return reduced input.
        To be defined in subclasses."""

        self.reset()
        # Default: Don't reduce
        return inp

class CachingReducer(Reducer):
    """A reducer that also caches test outcomes"""

    def reset(self):
        super().reset()
        self.cache = {}

    def test(self, inp):
        if inp in self.cache:
            return self.cache[inp]

        outcome = super().test(inp)
        self.cache[inp] = outcome
        return outcome

class DeltaDebuggingReducer(CachingReducer):
    """Reduce inputs using delta debugging."""

    def reduce(self, inp: str) -> str:
        """Reduce input `inp` using delta debugging. Return reduced input."""

        self.reset()
        assert self.test(inp) != Runner.PASS

        n = 2     # Initial granularity
        while len(inp) >= 2:
            start = 0.0
            subset_length = len(inp) / n
            some_complement_is_failing = False

            while start < len(inp):
                complement = inp[:int(start)] + \
                    inp[int(start + subset_length):]

                if self.test(complement) == Runner.FAIL:
                    inp = complement
                    n = max(n - 1, 2)
                    some_complement_is_failing = True
                    break

                start += subset_length

            if not some_complement_is_failing:
                if n == len(inp):
                    break
                n = min(n * 2, len(inp))

        return inp

dd_reducer = DeltaDebuggingReducer(mystery, log_test=True)
dd_reducer.reduce(failing_input)

Test #1 ' 7:,>((/$$-/->.;.=;(.%!:50#7*8=$&&=$9!%6(4=&69\':\'<3+0-3.24#7=!&60)2/+";+<7+1<2!4$>92+$1<(3%&5\'\'>#' 97 FAIL
Test #2 '\'<3+0-3.24#7=!&60)2/+";+<7+1<2!4$>92+$1<(3%&5\'\'>#' 49 PASS
Test #3 " 7:,>((/$$-/->.;.=;(.%!:50#7*8=$&&=$9!%6(4=&69':" 48 PASS
Test #4 '50#7*8=$&&=$9!%6(4=&69\':\'<3+0-3.24#7=!&60)2/+";+<7+1<2!4$>92+$1<(3%&5\'\'>#' 73 FAIL
Test #5 "50#7*8=$&&=$9!%6(4=&69':<7+1<2!4$>92+$1<(3%&5''>#" 49 PASS
Test #6 '50#7*8=$&&=$9!%6(4=&69\':\'<3+0-3.24#7=!&60)2/+";+' 48 FAIL
Test #7 '\'<3+0-3.24#7=!&60)2/+";+' 24 PASS
Test #8 "50#7*8=$&&=$9!%6(4=&69':" 24 PASS
Test #9 '9!%6(4=&69\':\'<3+0-3.24#7=!&60)2/+";+' 36 FAIL
Test #10 '9!%6(4=&69\':=!&60)2/+";+' 24 FAIL
Test #11 '=!&60)2/+";+' 12 PASS
Test #12 "9!%6(4=&69':" 12 PASS
Test #13 '=&69\':=!&60)2/+";+' 18 PASS
Test #14 '9!%6(4=!&60)2/+";+' 18 FAIL
Test #15 '9!%6(42/+";+' 12 PASS
Test #16 '9!%6(4=!&60)' 12 FAIL
Test #17 '=!&60)' 6 PASS
Test #18 '9!%6(4' 6 PASS
Test #19 '6(4=!&60)' 9 FAIL
Test #20 '6(460)' 6 FAIL
Test #21 '60)' 3 PASS
Test #22 '6(4' 3 PASS
Test #23 '(460)' 5 FAIL
Test #24 '460)' 4 PASS
Test #25 '(0)' 3 FAIL
Test #26 '0)' 2 PASS
Test #27 '(' 1 PASS
Test #28 '()' 2 FAIL
Test #29 ')' 1 PASS

'()'

quiz("What happens if the function under test does not fail?",
    [
        "Delta debugging searches for the minimal input"
        " that produces the same result",
        "Delta debugging starts a fuzzer to find a failure",
        "Delta debugging raises an AssertionError",
        "Delta debugging runs forever in a loop",
    ], '0 ** 0 + 1 ** 0 + 0 ** 1 + 1 ** 1')

with ExpectError():
    dd_reducer.reduce("I am a passing input")

Test #1 'I am a passing input' 20 PASS

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_23873/3080932113.py", line 2, in <module>
    dd_reducer.reduce("I am a passing input")
    ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_23873/2870451724.py", line 8, in reduce
    assert self.test(inp) != Runner.PASS
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError (expected)

expr_input = "1 + (2 * 3)"
dd_reducer = DeltaDebuggingReducer(mystery, log_test=True)
dd_reducer.reduce(expr_input)

Test #1 '1 + (2 * 3)' 11 FAIL
Test #2 '2 * 3)' 6 PASS
Test #3 '1 + (' 5 PASS
Test #4 '+ (2 * 3)' 9 FAIL
Test #5 '+ ( 3)' 6 FAIL
Test #6 ' 3)' 3 PASS
Test #7 '+ (' 3 PASS
Test #8 ' ( 3)' 5 FAIL
Test #9 '( 3)' 4 FAIL
Test #10 '3)' 2 PASS
Test #11 '( ' 2 PASS
Test #12 '(3)' 3 FAIL
Test #13 '()' 2 FAIL
Test #14 ')' 1 PASS
Test #15 '(' 1 PASS

'()'

class EvalMysteryRunner(MysteryRunner):
    def __init__(self) -> None:
        self.parser = EarleyParser(EXPR_GRAMMAR)

    def run(self, inp: str) -> Tuple[str, Outcome]:
        try:
            tree, *_ = self.parser.parse(inp)
        except SyntaxError:
            return (inp, Runner.UNRESOLVED)

        return super().run(inp)

eval_mystery = EvalMysteryRunner()

dd_reducer = DeltaDebuggingReducer(eval_mystery, log_test=True)
dd_reducer.reduce(expr_input)

Test #1 '1 + (2 * 3)' 11 FAIL
Test #2 '2 * 3)' 6 UNRESOLVED
Test #3 '1 + (' 5 UNRESOLVED
Test #4 '+ (2 * 3)' 9 UNRESOLVED
Test #5 '1 2 * 3)' 8 UNRESOLVED
Test #6 '1 + ( 3)' 8 UNRESOLVED
Test #7 '1 + (2 *' 8 UNRESOLVED
Test #8 ' + (2 * 3)' 10 UNRESOLVED
Test #9 '1+ (2 * 3)' 10 UNRESOLVED
Test #10 '1 (2 * 3)' 9 UNRESOLVED
Test #11 '1 + 2 * 3)' 10 UNRESOLVED
Test #12 '1 + ( * 3)' 10 UNRESOLVED
Test #13 '1 + (2 3)' 9 UNRESOLVED
Test #14 '1 + (2 *3)' 10 UNRESOLVED
Test #15 '1 + (2 * ' 9 UNRESOLVED
Test #16 '1  (2 * 3)' 10 UNRESOLVED
Test #17 '1 +(2 * 3)' 10 UNRESOLVED
Test #18 '1 + (2* 3)' 10 UNRESOLVED
Test #19 '1 + (2  3)' 10 UNRESOLVED
Test #20 '1 + (2 * )' 10 UNRESOLVED
Test #21 '1 + (2 * 3' 10 UNRESOLVED

'1 + (2 * 3)'

derivation_tree, *_ = EarleyParser(EXPR_GRAMMAR).parse(expr_input)
display_tree(derivation_tree)

new_derivation_tree = copy.deepcopy(derivation_tree)
# We really should have some query language
sub_expr_tree = new_derivation_tree[1][0][1][2]
display_tree(sub_expr_tree)

new_derivation_tree[1][0] = sub_expr_tree
display_tree(new_derivation_tree)

all_terminals(new_derivation_tree)

'(2 * 3)'

term_tree = new_derivation_tree[1][0][1][0][1][0][1][1][1][0]
display_tree(term_tree)

shorter_term_tree = term_tree[1][2]
display_tree(shorter_term_tree)

new_derivation_tree[1][0][1][0][1][0][1][1][1][0] = shorter_term_tree
display_tree(new_derivation_tree)

all_terminals(new_derivation_tree)

'(3)'

expr_input

'1 + (2 * 3)'

grammar_reducer = GrammarReducer(
    eval_mystery,
    EarleyParser(EXPR_GRAMMAR),
    log_test=True)
grammar_reducer.reduce(expr_input)

Test #1 '(2 * 3)' 7 FAIL
Test #2 '2 * 3' 5 PASS
Test #3 '3' 1 PASS
Test #4 '2' 1 PASS
Test #5 '(3)' 3 FAIL

'(3)'

grammar_reducer = GrammarReducer(
    mystery,
    EarleyParser(EXPR_GRAMMAR),
    log_reduce=True)

all_terminals(derivation_tree)

'1 + (2 * 3)'

display_tree(derivation_tree)

[all_terminals(t) for t in grammar_reducer.subtrees_with_symbol(
    derivation_tree, "<term>", depth=1)]

[]

[all_terminals(t) for t in grammar_reducer.subtrees_with_symbol(
    derivation_tree, "<term>", depth=2)]

['1']

[all_terminals(t) for t in grammar_reducer.subtrees_with_symbol(
    derivation_tree, "<term>", depth=3)]

['(2 * 3)']

class GrammarReducer(GrammarReducer):
    def reduce_tree(self, tree):
        depth = 0
        while depth < max_height(tree):
            reduced = self.reduce_subtree(tree, tree, depth)
            if reduced:
                depth = 0    # Start with new tree
            else:
                depth += 1   # Extend search for subtrees
        return tree

grammar_reducer = GrammarReducer(
    mystery,
    EarleyParser(EXPR_GRAMMAR),
    log_test=True)
grammar_reducer.reduce(expr_input)

Test #1 '(2 * 3)' 7 FAIL
Test #2 '(3)' 3 FAIL
Test #3 '3' 1 PASS

'(3)'

long_expr_input = GrammarFuzzer(EXPR_GRAMMAR, min_nonterminals=100).fuzz()
long_expr_input

'++---((-2 / 3 / 3 - -+1 / 5 - 2) * ++6 / +8 * 4 / 9 / 2 * 8 + ++(5) * 3 / 8 * 0 + 3 * 3 + 4 / 0 / 6 + 9) * ++++(+--9 * -3 * 7 / 4 + --(4) / 3 - 0 / 3 + 5 + 0) * (1 * 6 - 1 / 9 * 5 - 9 / 0 + 7) * ++(8 - 1) * +1 * 7 * 0 + ((1 + 4) / 4 * 8 * 9 * 4 + 4 / (4) * 1 - (4) * 8 * 5 + 1 + 4) / (+(2 - 1 - 9) * 5 + 3 + 6 - 2) * +3 * (3 - 7 + 8) / 4 - -(9 * 4 - 1 * 0 + 5) / (5 / 9 * 5 + 2) * 7 + ((7 - 5 + 3) / 1 * 8 - 8 - 9) * --+1 * 4 / 4 - 4 / 7 * 4 - 3 / 6 * 1 - 2 - 7 - 8'

grammar_reducer = GrammarReducer(eval_mystery, EarleyParser(EXPR_GRAMMAR))
with Timer() as grammar_time:
    print(grammar_reducer.reduce(long_expr_input))

(9)

grammar_reducer.tests

10

grammar_time.elapsed_time()

0.0488017080060672

dd_reducer = DeltaDebuggingReducer(eval_mystery)
with Timer() as dd_time:
    print(dd_reducer.reduce(long_expr_input))

((2 - 1 - 2) * 8 + (5) - (4)) / ((2) * 3) * (9) / 3 / 1 - 8

dd_reducer.tests

900

dd_time.elapsed_time()

1.2860496250214055

with DeltaDebugger() as dd:
    fun(args...)
dd

Reducing Failure-Inducing Inputs¶

Synopsis¶

Why Reducing?¶

Manual Input Reduction¶

Delta Debugging¶

Quiz

Grammar-Based Input Reduction¶

Lexical Reduction vs. Syntactic Rules¶

A Grammar-Based Reduction Approach¶

Simplifying by Replacing Subtrees¶

Simplifying by Alternative Expansions¶

A Depth-Oriented Strategy¶

Comparing Strategies¶

Lessons Learned¶

Next Steps¶

Background¶

Exercises¶

Exercise 1: Mutation-Based Fuzzing with Reduction¶

Exercise 2: Reduction by Production¶

Exercise 3: The Big Reduction Shoot-Out¶