Sunday, June 21, 2015

Parse C source code from python and print function declarations/ function calls

Complete source code: pylangparser
Source code to be parsed:
source = r""" #include <stdio.h> struct struct_name { signed short int *p; char p; int * t; int p[5][5]; /* comment */ }; union union_name { signed short int *p; char p; int * t; int p[5][5]; /* comment */ }; typedef unsigned char BYTE; unsigned int p, c; enum some_name{p = 1, q}; char * func(int p, char t);
int func2(const int p, char t) { int l, q; char *f; unsigned short j; q = 5; func(12, func1(42), 42); printf ("hello world: %" GST_TIME_FORMAT, time); best = (GstMatroskaPad *) data; gst_ebml_write_set_cache (ebml, 0x40); if (mux->doctype_version > 1 && !write_duration) { if (is_video_keyframe) flags += 0x80; } if (!(a>b) && !c) f(12, a); { { q = 1; } q = 5; return f; } if (5 == 6) { } else { p = 1; } while (5 == 6) { p = 1; } do { p = 1; } while (5 == 6); for (;;) { p = 1; break; } for (i = 5; i < 5; i++) { p = 1; if (i == 4) { abort (1); break; } } switch (i) { case 5: { break; } default: { break; } } switch (i) { case 5: break; default: break; } if (p == 5) p = 5; else goto error; /* * this is a multi-line comment */ return (p == 5); error: { if (p == 5) p = 5.5; return 1; } } const gchar * gst_flow_get_name (GstFlowReturn ret) { gint i; ret = CLAMP (ret, GST_FLOW_CUSTOM_ERROR, GST_FLOW_CUSTOM_SUCCESS); ret = f(a, b); for (i = 0; i < G_N_ELEMENTS (flow_quarks); i++) { p = flow_quarks[i].ret; if (ret == flow_quarks[i].ret) return flow_quarks[i].name; } return "unknown"; } """
The source code:

 result = translation_unit(tokens, 0)

#
# print all function declarations # print("\n--------------function declarations--------------") for group in result: if group.check_parser(function_declaration): group.pretty_print() def perform_call_search(group): for sub_group in group: # perform deep search perform_call_search(sub_group) if sub_group.check_parser(call_expression): # current sub_group is a call expression, print it sub_group.pretty_print() func_name = sub_group.get_sub_group(1) # func name must fulfill SymbolsParser &amp;&amp; IDENTIFIER if not (func_name.check_parser_instance(SymbolsParser) and \ func_name.check_parser(IDENTIFIER)): raise TypeError("internal error, func_name not IDENTIFIER") func_name_token = func_name.get_token() print("func name: %s" % func_name_token) func_args = sub_group.get_sub_group(2) if not func_args.check_parser(arglist): raise TypeError("internal error, func_args not arglist") for arg in func_args: print("arg: %s" % arg) # # print all function calls within each function # print("\n--------------function calls--------------") for group in result: if group.check_parser(function_definition): print("\nfound function definition, all function calls within " \ "its body:") perform_call_search(group)
Output after running the example script in the package:

--------------function declarations--------------
[['char'], [['*'], [['func'], [[['int'], ['p']], [['char'], ['t']]]]]]

--------------function calls--------------

found function definition, all function calls within its body:
[['func1'], ['42']]
func name: func1
[['func'], [['12'], [[['func1'], ['42']], ['42']]]]
func name: func
arg: (12, instance: (0x[0-9A-Fa-f]*|\d+))
arg: (((func1, instance: [A-Za-z_]+[A-Za-z0-9_]*), (42, instance: (0x[0-9A-Fa-f]*|\d+))), (42, instance: (0x[0-9A-Fa-f]*|\d+)))
[['printf'], [[['"hello world: %"'], ['GST_TIME_FORMAT']], ['time']]]
func name: printf
arg: (("hello world: %", instance: \".*\"), (GST_TIME_FORMAT, instance: [A-Za-z_]+[A-Za-z0-9_]*))
arg: (time, instance: [A-Za-z_]+[A-Za-z0-9_]*)
[['gst_ebml_write_set_cache'], [['ebml'], ['0x40']]]
func name: gst_ebml_write_set_cache
arg: (ebml, instance: [A-Za-z_]+[A-Za-z0-9_]*)
arg: (0x40, instance: (0x[0-9A-Fa-f]*|\d+))
[['f'], [['12'], ['a']]]
func name: f
arg: (12, instance: (0x[0-9A-Fa-f]*|\d+))
arg: (a, instance: [A-Za-z_]+[A-Za-z0-9_]*)
[['abort'], ['1']]
func name: abort

found function definition, all function calls within its body:
[['CLAMP'],
 [['ret'], [['GST_FLOW_CUSTOM_ERROR'], ['GST_FLOW_CUSTOM_SUCCESS']]]]
func name: CLAMP
arg: (ret, instance: [A-Za-z_]+[A-Za-z0-9_]*)
arg: ((GST_FLOW_CUSTOM_ERROR, instance: [A-Za-z_]+[A-Za-z0-9_]*), (GST_FLOW_CUSTOM_SUCCESS, instance: [A-Za-z_]+[A-Za-z0-9_]*))
[['f'], [['a'], ['b']]]
func name: f
arg: (a, instance: [A-Za-z_]+[A-Za-z0-9_]*)
arg: (b, instance: [A-Za-z_]+[A-Za-z0-9_]*)


No comments:

Post a Comment