libpypa: assert error in make_string

git log

commit e4b3c359d3d2a9ccbea1cbb0a977af8219dd64ef
Author: Kai Mast <[email protected]>
Date:   Wed Mar 7 18:24:57 2018 -0500

    More generic reader interface

error

/parser-test ./libpypa_assert_error_in_make_string.py

parser-test: pypa/parser/make_string.cc:70: pypa::String pypa::make_string(const String&, bool&, bool&, bool): Assertion `string_end != String::npos && string_start <= string_end' failed.
Aborted (core dumped)

testcase:https://github.com/xcainiao/poc/blob/master/libpypa_assert_error_in_make_string.py

analysis

gdb log

Legend: code, data, rodata, value
70	    assert(string_end != String::npos && string_start <= string_end);
gdb-peda$ print string_start
$1 = 0x5
gdb-peda$ print string_end
$2 = 0x4
gdb-peda$ bt
#0  pypa::make_string (input="'''''a'''", [email protected]: 0x0, [email protected]: 0x0, ignore_escaping=0x0) at pypa/parser/make_string.cc:70
#1  0x00000000004cd40f in pypa::atom (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:841
#2  0x00000000004d277b in pypa::power (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:1355
#3  0x00000000004d05d3 in pypa::factor (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:1173
#4  0x00000000004d8fa6 in pypa::term (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:2006
#5  0x00000000004ddf1c in pypa::arith_expr (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:2529
#6  0x00000000004cea4f in pypa::shift_expr (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:996
#7  0x00000000004e215a in pypa::generic_binop_expr<bool (*)(pypa::(anonymous namespace)::State&, std::shared_ptr<pypa::AstExpression>&)> (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0, 
    op=pypa::TokenKind::BinAnd, op_type=pypa::AstBinOpType::BitAnd, fun=0x4ce95c <pypa::shift_expr(pypa::(anonymous namespace)::State&, pypa::AstExpr&)>) at pypa/parser/parser.cc:190
#8  0x00000000004d2363 in pypa::and_expr (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:1326
#9  0x00000000004e215a in pypa::generic_binop_expr<bool (*)(pypa::(anonymous namespace)::State&, std::shared_ptr<pypa::AstExpression>&)> (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0, 
    op=pypa::TokenKind::CircumFlex, op_type=pypa::AstBinOpType::BitXor, fun=0x4d2333 <pypa::and_expr(pypa::(anonymous namespace)::State&, pypa::AstExpr&)>) at pypa/parser/parser.cc:190
#10 0x00000000004d96ef in pypa::xor_expr (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:2046
#11 0x00000000004e215a in pypa::generic_binop_expr<bool (*)(pypa::(anonymous namespace)::State&, std::shared_ptr<pypa::AstExpression>&)> (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0, 
    op=pypa::TokenKind::BinOr, op_type=pypa::AstBinOpType::BitOr, fun=0x4d96bf <pypa::xor_expr(pypa::(anonymous namespace)::State&, pypa::AstExpr&)>) at pypa/parser/parser.cc:190
#12 0x00000000004da227 in pypa::expr (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:2145
#13 0x00000000004d8acb in pypa::comparison (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:1980
#14 0x00000000004c9fa1 in pypa::not_test (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:461
#15 0x00000000004e2793 in pypa::generic_boolop_expr<bool (*)(pypa::(anonymous namespace)::State&, std::shared_ptr<pypa::AstExpression>&)> (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0, 
    op=pypa::Token::KeywordAnd, op_type=pypa::AstBoolOpType::And, fun=0x4c9d9f <pypa::not_test(pypa::(anonymous namespace)::State&, pypa::AstExpr&)>) at pypa/parser/parser.cc:209
#16 0x00000000004d4d79 in pypa::and_test (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:1613
#17 0x00000000004e2793 in pypa::generic_boolop_expr<bool (*)(pypa::(anonymous namespace)::State&, std::shared_ptr<pypa::AstExpression>&)> (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0, 
    op=pypa::Token::KeywordOr, op_type=pypa::AstBoolOpType::Or, fun=0x4d4d49 <pypa::and_test(pypa::(anonymous namespace)::State&, pypa::AstExpr&)>) at pypa/parser/parser.cc:209
#18 0x00000000004d9721 in pypa::or_test (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:2050
#19 0x00000000004d089a in pypa::test (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60700000dee0) at pypa/parser/parser.cc:1179
#20 0x00000000004d36e2 in pypa::testlist (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60600000eaf0) at pypa/parser/parser.cc:1462
#21 0x00000000004d6732 in pypa::expr_stmt (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60400000dfe0) at pypa/parser/parser.cc:1730
#22 0x00000000004c960e in pypa::small_stmt (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60400000dfe0) at pypa/parser/parser.cc:394
#23 0x00000000004cf324 in pypa::simple_stmt (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60600000eb50) at pypa/parser/parser.cc:1045
#24 0x00000000004d3e48 in pypa::stmt (s=..., ast=std::shared_ptr (count 2, weak 0) 0x60600000eb50) at pypa/parser/parser.cc:1516
#25 0x00000000004dfb61 in pypa::file_input (s=..., ast=std::shared_ptr (count 1, weak 0) 0x60600000ec10) at pypa/parser/parser.cc:2728
#26 0x00000000004e0a93 in pypa::parse (lexer=..., ast=std::shared_ptr (count 1, weak 0) 0x60600000ec10, symbols=std::shared_ptr (empty) 0x0, options=...) at pypa/parser/parser.cc:2781
#27 0x00000000004af15f in main (argc=0x2, argv=0x7fffffffdf28) at pypa/parser/test.cc:35
#28 0x00007ffff6aeb830 in __libc_start_main (main=0x4aef36 <main(int, char const**)>, argc=0x2, argv=0x7fffffffdf28, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, 
    stack_end=0x7fffffffdf18) at ../csu/libc-start.c:291
#29 0x0000000000407de9 in _start ()

fuzz

libfuzz

#include <cstdio>

#include <pypa/parser/parser.hh>

namespace pypa {
    void dump(AstPtr);
}

//int main(int argc, char const ** argv) 
#define filename  "./xxxx.py"
extern "C" int LLVMFuzzerTestOneInput(char *data, int size)
{
    
    FILE* temfile = fopen(filename,"w");
    fwrite(data, 1, size, temfile);
    fclose(temfile);

    pypa::AstModulePtr ast;
    pypa::SymbolTablePtr symbols;
    pypa::ParserOptions options;
    // options.python3allowed = true;
    options.printerrors = true;
    options.printdbgerrors = true;
    pypa::Lexer lexer(filename);
    if(pypa::parse(lexer, ast, symbols, options)) {
        printf("Parsing successfulln");
        dump(ast);
    }   
    else {
        fprintf(stderr, "Parsing failedn");
        return 0;
    }   
    return 0;
}