From: Ebersold Date: Mon, 20 Dec 2021 21:26:36 +0000 (+0100) Subject: Initial Import from SVN X-Git-Url: https://git.ebersold.fr/?a=commitdiff_plain;h=refs%2Fheads%2Fmaster;p=parser%2Fidl.git Initial Import from SVN --- bf13cda036cb0d79df31d4a0b41c6ba97e9d6116 diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..e9b7512 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,3 @@ +PROJECT(idl) + + diff --git a/idl.lex b/idl.lex new file mode 100644 index 0000000..6979fff --- /dev/null +++ b/idl.lex @@ -0,0 +1,174 @@ +/* + * MICO --- a free CORBA implementation + * Copyright (C) 1997-98 Kay Roemer & Arno Puder + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send comments and/or bug reports to: + * mico@informatik.uni-frankfurt.de + */ + +%{ + + +%} + +%option noyywrap + +/*--------------------------------------------------------------------------*/ + +Digits [0-9]+ +Oct_Digit [0-7] +Hex_Digit [a-fA-F0-9] +Int_Literal [1-9][0-9]* +Oct_Literal 0{Oct_Digit}* +Hex_Literal (0x|0X){Hex_Digit}* +Esc_Sequence1 "\\"[ntvbrfa\\\?\'\"] +Esc_Sequence2 "\\"{Oct_Digit}{1,3} +Esc_Sequence3 "\\"(x|X){Hex_Digit}{1,2} +Esc_Sequence ({Esc_Sequence1}|{Esc_Sequence2}|{Esc_Sequence3}) +Char ([^\n\t\"\'\\]|{Esc_Sequence}) +Char_Literal "'"({Char}|\")"'" +String_Literal \"({Char}|"'")*\" +Float_Literal1 {Digits}"."{Digits}?(e|E)("+"|"-")?{Digits} +Float_Literal2 {Digits}(e|E)("+"|"-")?{Digits} +Float_Literal3 {Digits}"."{Digits} +Float_Literal4 {Digits}"." +Float_Literal5 "."{Digits} +Float_Literal6 "."{Digits}(e|E)("+"|"-")?{Digits} +Fixed_Literal1 {Digits}(d|D) +Fixed_Literal2 {Digits}"."(d|D) +Fixed_Literal3 "."{Digits}(d|D) +Fixed_Literal4 {Digits}"."{Digits}(d|D) + +/*--------------------------------------------------------------------------*/ + +CORBA_Identifier [a-zA-Z_][a-zA-Z0-9_]* + +/*--------------------------------------------------------------------------*/ + + + +%% + +[ \t] ; +[\n] ; +"//"[^\n]* ; +"#pragma"[^\n]*\n { + return T_PRAGMA; + } +"#"[^\n]*\n { + preprocessor_directive( yytext ); + } +"{" return T_LEFT_CURLY_BRACKET; +"}" return T_RIGHT_CURLY_BRACKET; +"[" return T_LEFT_SQUARE_BRACKET; +"]" return T_RIGHT_SQUARE_BRACKET; +"(" return T_LEFT_PARANTHESIS; +")" return T_RIGHT_PARANTHESIS; +":" return T_COLON; +"," return T_COMMA; +";" return T_SEMICOLON; +"=" return T_EQUAL; +">>" return T_SHIFTRIGHT; +"<<" return T_SHIFTLEFT; +"+" return T_PLUS_SIGN; +"-" return T_MINUS_SIGN; +"*" return T_ASTERISK; +"/" return T_SOLIDUS; +"%" return T_PERCENT_SIGN; +"~" return T_TILDE; +"|" return T_VERTICAL_LINE; +"^" return T_CIRCUMFLEX; +"&" return T_AMPERSAND; +"<" return T_LESS_THAN_SIGN; +">" return T_GREATER_THAN_SIGN; + +const return T_CONST; +typedef return T_TYPEDEF; +float return T_FLOAT; +double return T_DOUBLE; +char return T_CHAR; +wchar return T_WCHAR; +fixed return T_FIXED; +boolean return T_BOOLEAN; +string return T_STRING; +wstring return T_WSTRING; +void return T_VOID; +unsigned return T_UNSIGNED; +long return T_LONG; +short return T_SHORT; +FALSE return T_FALSE; +TRUE return T_TRUE; +struct return T_STRUCT; +union return T_UNION; +switch return T_SWITCH; +case return T_CASE; +default return T_DEFAULT; +enum return T_ENUM; +in return T_IN; +out return T_OUT; +interface return T_INTERFACE; +abstract return T_ABSTRACT; +valuetype return T_VALUETYPE; +truncatable return T_TRUNCATABLE; +supports return T_SUPPORTS; +custom return T_CUSTOM; +public return T_PUBLIC; +private return T_PRIVATE; +factory return T_FACTORY; +native return T_NATIVE; +ValueBase return T_VALUEBASE; + +"::" return T_SCOPE; + +module return T_MODULE; +octet return T_OCTET; +any return T_ANY; +sequence return T_SEQUENCE; +readonly return T_READONLY; +attribute return T_ATTRIBUTE; +exception return T_EXCEPTION; +oneway return T_ONEWAY; +inout return T_INOUT; +raises return T_RAISES; +context return T_CONTEXT; + +Object return T_OBJECT; +Principal return T_PRINCIPAL; + + +{CORBA_Identifier} return T_IDENTIFIER; +{Float_Literal1} | +{Float_Literal2} | +{Float_Literal3} | +{Float_Literal4} | +{Float_Literal5} | +{Float_Literal6} return T_FLOATING_PT_LITERAL; +{Fixed_Literal1} | +{Fixed_Literal2} | +{Fixed_Literal3} | +{Fixed_Literal4} return T_FIXED_PT_LITERAL; +{Int_Literal} return T_INTEGER_LITERAL; +{Oct_Literal} return T_INTEGER_LITERAL; +{Hex_Literal} return T_INTEGER_LITERAL; +{Char_Literal} return T_CHARACTER_LITERAL; +{String_Literal} return T_STRING_LITERAL; +. { + return T_UNKNOWN; + } + +%% + diff --git a/idl.y b/idl.y new file mode 100644 index 0000000..b1bc36f --- /dev/null +++ b/idl.y @@ -0,0 +1,798 @@ +/* MICO --- a free CORBA implementation + * Copyright (C) 1997-98 Kay Roemer & Arno Puder + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Send comments and/or bug reports to: + * mico@informatik.uni-frankfurt.de + */ + + +%token T_AMPERSAND +%token T_ANY +%token T_ASTERISK +%token T_ATTRIBUTE +%token T_BOOLEAN +%token T_CASE +%token T_CHAR +%token T_CHARACTER_LITERAL +%token T_CIRCUMFLEX +%token T_COLON +%token T_COMMA +%token T_CONST +%token T_CONTEXT +%token T_DEFAULT +%token T_DOUBLE +%token T_ENUM +%token T_EQUAL +%token T_EXCEPTION +%token T_FALSE +%token T_FIXED +%token T_FIXED_PT_LITERAL +%token T_FLOAT +%token T_FLOATING_PT_LITERAL +%token T_GREATER_THAN_SIGN +%token T_IDENTIFIER +%token T_IN +%token T_INOUT +%token T_INTEGER_LITERAL +%token T_INTERFACE +%token T_LEFT_CURLY_BRACKET +%token T_LEFT_PARANTHESIS +%token T_LEFT_SQUARE_BRACKET +%token T_LESS_THAN_SIGN +%token T_LONG +%token T_MINUS_SIGN +%token T_MODULE +%token T_OCTET +%token T_ONEWAY +%token T_OUT +%token T_PERCENT_SIGN +%token T_PLUS_SIGN +%token T_PRINCIPAL +%token T_RAISES +%token T_READONLY +%token T_RIGHT_CURLY_BRACKET +%token T_RIGHT_PARANTHESIS +%token T_RIGHT_SQUARE_BRACKET +%token T_SCOPE +%token T_SEMICOLON +%token T_SEQUENCE +%token T_SHIFTLEFT +%token T_SHIFTRIGHT +%token T_SHORT +%token T_SOLIDUS +%token T_STRING +%token T_STRING_LITERAL +%token T_PRAGMA +%token T_STRUCT +%token T_SWITCH +%token T_TILDE +%token T_TRUE +%token T_OBJECT +%token T_TYPEDEF +%token T_UNION +%token T_UNSIGNED +%token T_VERTICAL_LINE +%token T_VOID +%token T_WCHAR +%token T_WSTRING +%token T_UNKNOWN +%token T_ABSTRACT +%token T_VALUETYPE +%token T_TRUNCATABLE +%token T_SUPPORTS +%token T_CUSTOM +%token T_PUBLIC +%token T_PRIVATE +%token T_FACTORY +%token T_NATIVE +%token T_VALUEBASE + + +%% + +/*1*/ +specification + : /*empty*/ + | definitions + ; + + +definitions + : definition + | definition definitions + ; + +/*2*/ +definition + : type_dcl T_SEMICOLON + | const_dcl T_SEMICOLON + | except_dcl T_SEMICOLON + | interface T_SEMICOLON + | module T_SEMICOLON + | value T_SEMICOLON + ; + +/*3*/ +module + : T_MODULE T_IDENTIFIER T_LEFT_CURLY_BRACKET + definitions T_RIGHT_CURLY_BRACKET + ; + +/*4*/ +interface + : interface_dcl + | forward_dcl + ; + +/*5*/ +interface_dcl + : interface_header T_LEFT_CURLY_BRACKET interface_body + T_RIGHT_CURLY_BRACKET + ; + +/*6*/ +forward_dcl + : T_INTERFACE T_IDENTIFIER + | T_ABSTRACT T_INTERFACE T_IDENTIFIER + ; + +/*7*/ +interface_header + : T_INTERFACE T_IDENTIFIER + | T_INTERFACE T_IDENTIFIER interface_inheritance_spec + | T_ABSTRACT T_INTERFACE T_IDENTIFIER + | T_ABSTRACT T_INTERFACE T_IDENTIFIER interface_inheritance_spec + ; + +/*8*/ +interface_body + : /*empty*/ + | exports + ; + +exports + : export + | export exports + ; + +/*9*/ +export + : type_dcl T_SEMICOLON + | const_dcl T_SEMICOLON + | except_dcl T_SEMICOLON + | attr_dcl T_SEMICOLON + | op_dcl T_SEMICOLON + ; + +/*10*/ +interface_inheritance_spec + : T_COLON interface_names + ; + +interface_names + : scoped_names + ; + +scoped_names + : scoped_name + | scoped_name T_COMMA scoped_names + ; + +/*11*/ +interface_name + : scoped_name + ; + +/*12*/ +scoped_name + : T_IDENTIFIER + | T_SCOPE T_IDENTIFIER + | scoped_name T_SCOPE T_IDENTIFIER + ; + +/*13*/ +value + : value_dcl + | value_abs_dcl + | value_box_dcl + | value_forward_dcl + ; + +/*14*/ +value_forward_dcl + : T_VALUETYPE T_IDENTIFIER + | T_ABSTRACT T_VALUETYPE T_IDENTIFIER + ; + +/*15*/ +value_box_dcl + : T_VALUETYPE T_IDENTIFIER type_spec + ; + +/*16*/ +value_abs_dcl + : T_ABSTRACT T_VALUETYPE T_IDENTIFIER + T_LEFT_CURLY_BRACKET value_body T_RIGHT_CURLY_BRACKET + | T_ABSTRACT T_VALUETYPE T_IDENTIFIER value_inheritance_spec + T_LEFT_CURLY_BRACKET value_body T_RIGHT_CURLY_BRACKET + ; + +value_body + : /*empty*/ + | exports + ; + +/*17*/ +value_dcl + : value_header T_LEFT_CURLY_BRACKET value_elements + T_RIGHT_CURLY_BRACKET + | value_header T_LEFT_CURLY_BRACKET T_RIGHT_CURLY_BRACKET + ; + +value_elements + : value_element + | value_element value_elements + ; + +/*18*/ +value_header + : T_VALUETYPE T_IDENTIFIER value_inheritance_spec + | T_CUSTOM T_VALUETYPE T_IDENTIFIER value_inheritance_spec + | T_VALUETYPE T_IDENTIFIER + | T_CUSTOM T_VALUETYPE T_IDENTIFIER + ; + +/*19*/ +value_inheritance_spec + : T_COLON value_inheritance_bases + | T_COLON value_inheritance_bases T_SUPPORTS interface_names + | T_SUPPORTS interface_names + ; + +value_inheritance_bases + : value_name + | value_name T_COMMA value_names + | T_TRUNCATABLE value_name + | T_TRUNCATABLE value_name T_COMMA value_names + ; + +value_names + : scoped_names + ; + +/*20*/ +value_name + : scoped_name + ; + +/*21*/ +value_element + : export + | state_member + | init_dcl + ; + +/*22*/ +state_member + : T_PUBLIC type_spec declarators T_SEMICOLON + | T_PRIVATE type_spec declarators T_SEMICOLON + ; + +/*23*/ +init_dcl + : T_FACTORY T_IDENTIFIER + T_LEFT_PARANTHESIS init_param_decls T_RIGHT_PARANTHESIS + T_SEMICOLON + ; + +/*24*/ +init_param_decls + : init_param_decl + | init_param_decl T_COMMA init_param_decls + ; + +/*25*/ +init_param_decl + : init_param_attribute param_type_spec simple_declarator + ; + +/*26*/ +init_param_attribute + : T_IN + ; + +/*27*/ +const_dcl + : T_CONST const_type T_IDENTIFIER T_EQUAL const_exp + ; + +/*28*/ +const_type + : integer_type + | char_type + | wide_char_type + | boolean_type + | floating_pt_type + | string_type + | wide_string_type + | fixed_pt_const_type + | scoped_name + | octet_type + ; + +/*29*/ +const_exp + : or_expr + ; + +/*30*/ +or_expr + : xor_expr + | or_expr T_VERTICAL_LINE xor_expr + ; + +/*31*/ +xor_expr + : and_expr + | xor_expr T_CIRCUMFLEX and_expr + ; + +/*32*/ +and_expr + : shift_expr + | and_expr T_AMPERSAND shift_expr + ; + +/*33*/ +shift_expr + : add_expr + | shift_expr T_SHIFTRIGHT add_expr + | shift_expr T_SHIFTLEFT add_expr + ; + +/*34*/ +add_expr + : mult_expr + | add_expr T_PLUS_SIGN mult_expr + | add_expr T_MINUS_SIGN mult_expr + ; + +/*35*/ +mult_expr + : unary_expr + | mult_expr T_ASTERISK unary_expr + | mult_expr T_SOLIDUS unary_expr + | mult_expr T_PERCENT_SIGN unary_expr + ; + +/*36*/ +/*37*/ +unary_expr + : T_MINUS_SIGN primary_expr + | T_PLUS_SIGN primary_expr + | T_TILDE primary_expr + | primary_expr + ; + +/*38*/ +primary_expr + : scoped_name + | literal + | T_LEFT_PARANTHESIS const_exp T_RIGHT_PARANTHESIS + ; + +/*39*/ +/*40*/ +literal + : T_INTEGER_LITERAL + | T_string_literal + | T_CHARACTER_LITERAL + | T_FIXED_PT_LITERAL + | T_FLOATING_PT_LITERAL + | T_TRUE /*boolean_literal*/ + | T_FALSE /*boolean_literal*/ + ; + +/*41*/ +positive_int_const + : const_exp + ; + +/*42*/ +/*43*/ +type_dcl + : T_TYPEDEF type_spec declarators + | struct_type + | union_type + | enum_type + | T_NATIVE simple_declarator + ; + +/*44*/ +type_spec + : simple_type_spec + | constr_type_spec + ; + +/*45*/ +simple_type_spec + : base_type_spec + | template_type_spec + | scoped_name + ; + +/*46*/ +base_type_spec + : floating_pt_type + | integer_type + | char_type + | wide_char_type + | boolean_type + | octet_type + | any_type + | object_type + | value_base_type + | principal_type /*New*/ + ; + +/*47*/ +template_type_spec + : sequence_type + | string_type + | wide_string_type + | fixed_pt_type + ; + +/*48*/ +constr_type_spec + : struct_type + | union_type + | enum_type + ; + +/*49*/ +declarators + : declarator + | declarator T_COMMA declarators + ; + +/*50*/ +declarator + : simple_declarator + | complex_declarator + ; + +/*51*/ +simple_declarator + : T_IDENTIFIER + ; + +/*52*/ +complex_declarator + : array_declarator + ; + +/*53*/ +floating_pt_type + : T_FLOAT + | T_DOUBLE + | T_LONG T_DOUBLE + ; + +/*54*/ +integer_type + : signed_int + | unsigned_int + ; + +/*55*/ +signed_int + : signed_long_int + | signed_short_int + | signed_longlong_int + ; + +/*56*/ +signed_short_int + : T_SHORT + ; + +/*57*/ +signed_long_int + : T_LONG + ; + +/*58*/ +signed_longlong_int + : T_LONG T_LONG + ; + +/*59*/ +unsigned_int + : unsigned_long_int + | unsigned_short_int + | unsigned_longlong_int + ; + +/*60*/ +unsigned_short_int + : T_UNSIGNED T_SHORT + ; + +/*61*/ +unsigned_long_int + : T_UNSIGNED T_LONG + ; + +/*62*/ +unsigned_longlong_int + : T_UNSIGNED T_LONG T_LONG + ; + +/*63*/ +char_type + : T_CHAR + ; + +/*64*/ +wide_char_type + : T_WCHAR + ; + +/*65*/ +boolean_type + : T_BOOLEAN + ; + +/*66*/ +octet_type + : T_OCTET + ; + +/*67*/ +any_type + : T_ANY + ; + +/*68*/ +object_type + : T_OBJECT + ; + +/*69*/ +struct_type + : T_STRUCT T_IDENTIFIER T_LEFT_CURLY_BRACKET member_list + T_RIGHT_CURLY_BRACKET + ; + +/*70*/ +member_list + : member + | member member_list + ; + +/*71*/ +member + : type_spec declarators T_SEMICOLON + ; + +/*72*/ +union_type + : T_UNION T_IDENTIFIER T_SWITCH T_LEFT_PARANTHESIS + switch_type_spec T_RIGHT_PARANTHESIS T_LEFT_CURLY_BRACKET + switch_body T_RIGHT_CURLY_BRACKET + ; + +/*73*/ +switch_type_spec + : integer_type + | char_type + | boolean_type + | enum_type + | scoped_name + ; + +/*74*/ +switch_body + : case + | case switch_body + ; + +/*75*/ +case + : case_label case + | case_label element_spec T_SEMICOLON + | case_label T_PRAGMA element_spec T_SEMICOLON /* New */ + ; + +/*76*/ +case_label + : T_CASE const_exp T_COLON + | T_DEFAULT T_COLON + ; + +/*77*/ +element_spec + : type_spec declarator + ; + +/*78*/ +enum_type + : T_ENUM T_IDENTIFIER T_LEFT_CURLY_BRACKET enumerators + T_RIGHT_CURLY_BRACKET + ; + +enumerators + : enumerator + | enumerator T_COMMA enumerators + ; + +/*79*/ +enumerator + : T_IDENTIFIER + ; + +/*80*/ +sequence_type + : T_SEQUENCE T_LESS_THAN_SIGN simple_type_spec T_COMMA + positive_int_const T_GREATER_THAN_SIGN + | T_SEQUENCE T_LESS_THAN_SIGN simple_type_spec T_GREATER_THAN_SIGN + ; + +/*81*/ +string_type + : T_STRING T_LESS_THAN_SIGN positive_int_const T_GREATER_THAN_SIGN + | T_STRING + ; + +/*82*/ +wide_string_type + : T_WSTRING T_LESS_THAN_SIGN positive_int_const T_GREATER_THAN_SIGN + | T_WSTRING + ; + +/*83*/ +array_declarator + : T_IDENTIFIER fixed_array_sizes + ; + +fixed_array_sizes + : fixed_array_size + | fixed_array_size fixed_array_sizes + ; + +/*84*/ +fixed_array_size + : T_LEFT_SQUARE_BRACKET positive_int_const T_RIGHT_SQUARE_BRACKET + ; + +/*85*/ +attr_dcl + : T_ATTRIBUTE param_type_spec simple_declarators + | T_READONLY T_ATTRIBUTE param_type_spec simple_declarators + ; + +simple_declarators + : simple_declarator + | simple_declarator T_COMMA simple_declarators + ; + +/*86*/ +except_dcl + : T_EXCEPTION T_IDENTIFIER T_LEFT_CURLY_BRACKET members + T_RIGHT_CURLY_BRACKET + ; + +members + : /*empty*/ + | member members + ; + +/*87*/ +op_dcl + : op_attribute op_type_spec T_IDENTIFIER parameter_dcls + raises_expr context_expr + ; + +/*88*/ +op_attribute + : /*empty*/ + | T_ONEWAY + ; + +/*89*/ +op_type_spec + : param_type_spec + | T_VOID + ; + +/*90*/ +parameter_dcls + : T_LEFT_PARANTHESIS param_dcls T_RIGHT_PARANTHESIS + | T_LEFT_PARANTHESIS T_RIGHT_PARANTHESIS + ; + +param_dcls + : param_dcl + | param_dcl T_COMMA param_dcls + ; + +/*91*/ +param_dcl + : param_attribute param_type_spec simple_declarator + ; + +/*92*/ +param_attribute + : T_IN + | T_OUT + | T_INOUT + ; + +/*93*/ +raises_expr + : /*empty*/ + | T_RAISES T_LEFT_PARANTHESIS scoped_names T_RIGHT_PARANTHESIS + ; + +/*94*/ +context_expr + : /*empty*/ + | T_CONTEXT T_LEFT_PARANTHESIS string_literals T_RIGHT_PARANTHESIS + ; + +string_literals + : T_string_literal + | T_string_literal T_COMMA string_literals + ; + +T_string_literal + : T_STRING_LITERAL + | T_STRING_LITERAL T_string_literal + ; + +/*95*/ +param_type_spec + : base_type_spec + | string_type + | wide_string_type + | scoped_name + ; + +/*96*/ +fixed_pt_type + : T_FIXED T_LESS_THAN_SIGN positive_int_const T_COMMA + T_INTEGER_LITERAL T_GREATER_THAN_SIGN + ; + +/*97*/ +fixed_pt_const_type + : T_FIXED + ; + +/*98*/ +value_base_type + : T_VALUEBASE + ; + +/* New production for Principal */ +principal_type + : T_PRINCIPAL + ; + +%% + diff --git a/pidl/Generators/AbstractGenerator.py b/pidl/Generators/AbstractGenerator.py new file mode 100644 index 0000000..36ffcd1 --- /dev/null +++ b/pidl/Generators/AbstractGenerator.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python2.7 +# First implementation in Python for IDL syntax parsing +# vim: noet sw=4 sws=4 ts=4 list + +import os,sys + + +class AbstractGenerator: + def __init__(self,_out): + self.out = _out + + def genCDATA(self,p): + """ Handle CDATA generation""" + + def genNative(self,p): + """ Handle CDATA generation""" + self.out.write('/* Yes Native is there*/\n'); + + def genConst(self,p): + """ Handle CDATA generation""" + + def genStruct(self,p): + """ Handle CDATA generation""" + + def genForward(self,p): + """ Handle CDATA generation""" + + def genTypedef(self,p): + """ Handle CDATA generation""" + + def genComponent(self,p): + """ Handle CDATA generation""" + + def genHome(self,p): + """ Handle CDATA generation""" + + def genEnum(self,p): + """ Handle CDATA generation""" + + def genProduction(self,p): + if p.kind == 'interface': + self.genInterface(p) + + if p.kind == 'forward': + self.genForward(p) + +# if p.kind == 'include': +# # continue + + if p.kind == 'native': + self.genNative(p) + + if p.kind == 'module': + self.genModule(p) + + if p.kind == 'cdata': + self.genCDATA(p) + + if p.kind == 'struct': + self.genStruct(p) + + if p.kind == 'const': + self.genConst(p) + + if p.kind == 'typedef': + self.genTypedef(p) + + if p.kind == 'component': + self.genComponent(p) + + if p.kind == 'home': + self.genHome(p) + + if p.kind == 'enum': + self.genEnum(p) + + + +class AbstractIDLGenerator(AbstractGenerator): + """This is the entry point to generate a complete IDL description """ + def __init__(self,_out,_idl): + AbstractGenerator.__init__(self,_out) + self.idl = _idl + + + """ Implement this method at a lower level to travers the IDL""" + def generate(self): + """TODO""" + + def genInterface(self,iface): + """TODO""" + + def genModule(self,module): + """TODO""" + + def genProductions(self): + for p in self.idl.productions: + self.genProduction(p) + + +class AbstractInterfaceGenerator: + def __init__(self,_out,_idl,_interface): + """TODO""" + self.iface = _interface + self.idl = _idl + self.out = _out + def genMethod(self,p): + """TODO""" + def genTypedef(self,p): + pass + def genAttribute(self,p): + pass + def genBody(self): + for m in self.iface.members: + if m.kind == 'method': + self.genMethod(m) + elif m.kind =='const': + self.genTypedef(m) + + def generate(self): + self.out("/*Implement generate for InterfaceGenerator*/\n") + +# Generic component Generator +class AbstractComponentGenerator: + def __init__(self,_out,_idl,_component): + self.component = _component + self.idl = _idl + self.out = _out + + def genProvides(self,p): + """TODO""" + def genUses(self,p): + """TODO""" + def genAttribute(self,p): + pass + def genBody(self): + for p in self.component.provides: + self.genProvides(p) + for p in self.component.uses: + self.genUses(p) + for p in self.component.attributes: + self.genAttributes(p) + + def generate(self): + self.out("/*Implement generate for ComponentGenerator*/\n") + + +class AbstractHomeGenerator: + """Initial abstract interface to generate code for Home + instances + """ + def __init__(self,_out,_idl,_home): + self.home = _home + self.out = _out + self.idl = _idl + + def genFactory(self,factory): + pass + + def genBody(self): + for export in self.home.Exports: + if export.kind == 'factory': + self.genFactory(export) + diff --git a/pidl/Generators/Cpp/__init__.py b/pidl/Generators/Cpp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pidl/Generators/Cpp/header.py b/pidl/Generators/Cpp/header.py new file mode 100644 index 0000000..03f5c32 --- /dev/null +++ b/pidl/Generators/Cpp/header.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python2.7 +# First implementation in Python for IDL syntax parsing +# vim: noet sw=4 sws=4 ts=4 list + +import os,sys + +import Generators.AbstractGenerator +from Generators.AbstractGenerator import AbstractGenerator +from Generators.AbstractGenerator import AbstractIDLGenerator +from Generators.AbstractGenerator import AbstractInterfaceGenerator +from Generators.AbstractGenerator import AbstractComponentGenerator + +header_prolog= """#ifndef %(basename)s__H_ +#define %(basename)s__H_ + +""" + +header_epilog=""" +#endif /* %(basename)s__H_ */ +""" + +forward_decl= """class %(name)s ; /* forward declaration */ +""" + +example_tmpl="""class %(name)s""" + +module_decl = """namespace %(name)s {\n""" + +interface_decl = """ +/** + * @brief interface %(name)s + */ +class %(name)s %(base)s +{ + public: + /// Ctors + %(name)s(); + + virtual ~%(name)s() {}; + +""" + +component_decl = """ +/** + * @brief component %(name)s + */ +class %(name)s : public aeb::lcm:component +{ + public: + /// Ctors + %(name)s(); + + virtual ~%(name)s() {}; + +""" + +struct_header=r""" +/* starting struct %(name)s */ + +""" +iface_header=r""" +/* starting interface %(name)s */ + +""" + +def idl_basename(f): + """ return the basename of the file without the extention """ + return os.path.basename(f).rpartition('.')[0] + +def constdeclAsNative(c): + if c.type == 'string': + return "const char * %s \"%s\";\n" % (c.name,c.getValue()) + else: + return "#define\t%s\t%s\n" % (c.name,c.getValue()) + +def methodAsNative(m): + params = "" + return "%s %s(%s) = 0;" % (m.type, m.name,paramlistAsNative(m)) + + +def paramlistAsNative(m,empty='void'): + l = [ paramAsNative(p) for p in m.params] + if len(l) == 0: + return empty + return ", ".join(l) + +def paramAsNative(p): + return "%s%s" % (p.nativeType(),p.name) + + +def write_struct(struct,fd): + def write_member_decl(m): + fd.write(" %s;\n" % m.nativeType()) + + names = {'name' : struct.name + , 'macroname' : struct.name.upper() + } + fd.write(struct_header % names) + fd.write('struct %s\n{\n' % struct.name) + for m in struct.members: + write_member_decl(m) + + fd.write('};\n') + + +class IfaceInterface(AbstractInterfaceGenerator): + def __init__(self,_out,_idl,_iface): + self.iface = _iface + self.out = _out + self.idl = _idl + def genMethod(self,m): + fd = self.out + fd.write("\n /* %s */\n" % m.toIDL() ) + fd.write(" %s\n" % methodAsNative(m) ) + + def genDecl(self,iface): + base = "" + if iface.base: + base = ' : public %s\n' % iface.base + + names = { 'name' : iface.name + , 'base' : base + } + self.out.write(interface_decl % names) + + def generate(self,iface): + fd = self.out + self.iface = iface + # write interface main + names = {'name' : iface.name + , 'macroname' : iface.name.upper() + } + if iface.isLocal(): + fd.write("/* Interface is local */") + fd.write(iface_header % names) + self.genDecl(iface) +# fd.write('class %s' % iface.name) + +# if iface.base: +# fd.write(' : public %s\n{\n public:\n' % iface.base) +# else: +# fd.write('\n{\n public:\n' ) + + self.genBody() + fd.write('};\n') + + + +class IfaceComponent(AbstractComponentGenerator): + def __init__(self,_out,_idl,_comp): + AbstractComponentGenerator.__init__(self,_out,_idl,_comp) + + def genProvides(self,p): + out = self.out + out.write(" %(iface)s provide_%(name)s();\n" % {'name' : p.name,'iface' : p.iface}) + + def genUses(self,p): + out = self.out + out.write(" void connect_%(name)s(%(iface)s *_iface);\n" % {'name' : p.name, 'iface' : p.iface}) + out.write(" void disconnect_%(name)s(%(iface)s *_iface);\n" % {'name' : p.name, 'iface' : p.iface}) + + def genAttributes(self,p): + return + + def generate(self,_comp): + self.component = _comp +# name = {'basename' : idl_basename(self.filename)} +# self.out.write(header_prolog % name) + self.out.write(component_decl % {'name' : _comp.name}) + self.genBody() +# self.out.write(header_epilog % name) + self.out.write('}; /* End component %s */\n' % _comp.name) + + +# Main exported function + +"""Does this work ?""" +class HeaderGenerator(AbstractIDLGenerator): + def __init__(self,_out,_idl,filename): + AbstractIDLGenerator.__init__(self,_out,_idl) + self.filename = filename + self.genIface = IfaceInterface(_out,_idl,None) + self.genIComp = IfaceComponent(_out,_idl,None) + + def genCDATA(self,p): + self.out.write(p.data) + + def genStruct(self,p): + write_struct(p,self.out) + + def genConst(self,p): + self.out.write(constdeclAsNative(p)) + + def genForward(self,p): + self.out.write(forward_decl % { 'name' : p.name}) + + def genInterface(self,iface): + self.genIface.generate(iface) + + def genModule(self,p): + self.out.write(module_decl % { 'name' : p.name}) + for p1 in p.productions: + self.genProduction(p1) + self.out.write('}; /* End namespace */\n') + + def genComponent(self,p): + self.genIComp.generate(p) + return + out = self.out + def write_provides_decl(p): + out.write(" %(name)s provide_%(name)s();\n" % {'name' : p.name}) + out.write(component_decl % {'name' : p.name}) + + def write_uses_decl(p): + out.write(" %(name)s provide_%(name)s();\n" % {'name' : p.name}) + out.write(component_decl % {'name' : p.name}) + + for p1 in p.provides: + write_provides_decl(p1) + out.write('}; /* End component/\n') + + def genHome(self,p): + self.out.write("\n/* Don't generated anything for Home %s */\n" % p.name) + + def generate(self): + name = {'basename' : idl_basename(self.filename)} + self.out.write(header_prolog % name) + self.genProductions() + self.out.write(header_epilog % name) + diff --git a/pidl/Generators/Cpp/source.py b/pidl/Generators/Cpp/source.py new file mode 100644 index 0000000..c2ae5f5 --- /dev/null +++ b/pidl/Generators/Cpp/source.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python2.7 +# First implementation in Python for IDL syntax parsing +# vim: noet sw=4 sws=4 ts=4 list + +import os,sys + +import Generators.AbstractGenerator +from Generators.AbstractGenerator import AbstractGenerator +from Generators.AbstractGenerator import AbstractIDLGenerator +from Generators.AbstractGenerator import AbstractComponentGenerator +from Generators.AbstractGenerator import AbstractHomeGenerator + +header_prolog= """#ifndef %(basename)s__H_ +#define %(basename)s__H_ + +""" + +header_epilog=""" +#endif /* %(basename)s__H_ */ +""" + +forward_decl= """class %(name)s ; /* forward declaration */ """ + +example_tmpl="""class %(name)s""" + +module_decl = """namespace %(name)s {\n""" + +method_impl = """%(type)s %(cls)s::%(function)s(%(parameters)s) +{ +} +""" + +struct_header=r""" +/* starting struct %(name)s */ + +""" +iface_header=r""" +/** + * @brief implement skeleton for interface %(name)s + */ + +""" + +component_impl = """ +/** + * @brief component %(name)s + */ + +%(name)s::%(name)s() + +{ +} + +~%(name)s()::%(name)s +{ +} + +""" + + +def idl_basename(f): + """ return the basename of the file without the extention """ + return os.path.basename(f).rpartition('.')[0] + +def constdeclAsNative(c): + return "#define\t%s\t%s\n" % (c.name,c.getValue()) + +def methodAsNative(m): + params = "" + return "%s %s(%s) ;" % (m.type, m.name,paramlistAsNative(m)) + + +def paramlistAsNative(m,empty='void'): + l = [ paramAsNative(p) for p in m.params] + if len(l) == 0: + return empty + return ", ".join(l) + +def paramAsNative(p): + return "%s%s" % (p.nativeType(),p.name) + + +def write_struct(struct,fd): + def write_member_decl(m): + fd.write(" %s;\n" % m.nativeType()) + + names = {'name' : struct.name + , 'macroname' : struct.name.upper() + } + fd.write(struct_header % names) + fd.write('struct %s\n{\n' % struct.name) + for m in struct.members: + write_member_decl(m) + + fd.write('};\n') + + + +class ImplComponent(AbstractComponentGenerator): + def __init__(self,_out,_idl,_comp): + AbstractComponentGenerator.__init__(self,_out,_idl,_comp) + + def genProvides(self,p): + out = self.out + out.write(" %(iface)s provide_%(name)s();\n" % {'name' : p.name,'iface' : p.iface}) + + def genUses(self,p): + out = self.out + out.write("void connect_%(name)s(%(iface)s *_iface)\n{\n}\n" % {'name' : p.name, 'iface' : p.iface}) + out.write("void disconnect_%(name)s(%(iface)s *_iface)\n{\n}\n" % {'name' : p.name, 'iface' : p.iface}) + + def genAttributes(self,p): + return + + def generate(self,_comp): + self.component = _comp +# name = {'basename' : idl_basename(self.filename)} +# self.out.write(header_prolog % name) + self.out.write(component_impl % {'name' : _comp.name}) + self.genBody() +# self.out.write(header_epilog % name) + self.out.write('}; /* End component %s */\n' % _comp.name) + + + +class ImplHome(AbstractHomeGenerator): + """HomeImpl class responsible to generate the c++ ode + for the Home Instace + """ + def __init__(self,_out,_idl,_comp): + AbstractHomeGenerator.__init__(self,_out,_idl,_comp) + + def genFactory(self,p): + out = self.out + out.write("create_%(name)s()\n{\n}\n" % {'name' : p.name}) + + def generate(self,_home): + self.home = _home + #self.out.write(component_decl % {'name' : _comp.name}) + self.genBody() + self.out.write('}; /* End component %s */\n' % _home.name) + + +# Main exported function + + +class SourceGenerator(AbstractIDLGenerator): + def __init__(self,_out,_idl,filename): + AbstractIDLGenerator.__init__(self,_out,_idl) + self.filename = filename + self.genIComp = ImplComponent(_out,_idl,None) + self.genIHome = ImplHome(_out,_idl,None) + + + def genCDATA(self,p): + self.out.write(p.data) + + def genComponent(self,p): + self.genIComp.generate(p) + + def genHome(self,p): + self.genIHome.generate(p) + + def genStruct(self,p): + pass + + def genConst(self,p): + pass + + def genForward(self,p): + pass + + def genInterface(self,iface): + fd = self.out + _names = { 'name' : iface.name , 'macroname' : iface.name.upper() } + def write_method_impl(cls,m): + mnames = { 'type' : m.type , 'cls' : cls, 'function' : m.name , 'parameters' : paramlistAsNative(m) } + fd.write("\n/**\n * method %s\n */\n" % m.toIDL() ) + fd.write(method_impl % mnames ) + + # write interface main + fd.write(iface_header % _names) + + for m in iface.members: + if m.kind == 'method': + write_method_impl(iface.name,m) + + + def genModule(self,p): + self.out.write(module_decl % { 'name' : p.name}) + for p1 in p.productions: + self.genProduction(p1) + self.out.write('}; /* End namespace */\n') + + + def generate(self): + name = {'basename' : idl_basename(self.filename)} + self.out.write(header_prolog % name) + self.genProductions() + self.out.write(header_epilog % name) + diff --git a/pidl/Generators/Generator.py b/pidl/Generators/Generator.py new file mode 100644 index 0000000..a6a2b9c --- /dev/null +++ b/pidl/Generators/Generator.py @@ -0,0 +1,25 @@ +import sys, os.path + +import Generators.Cpp.header +import Generators.Cpp.source +from Generators.Cpp.header import HeaderGenerator +from Generators.Cpp.source import SourceGenerator + +class Generator: + + def __init__(self,genType,_out,_idl,_outdir,_outfile): + self.filename = _outfile + self.out = _out + self.idl = _idl + self._createGen(genType) + + def _createGen(self,genType): + if genType == 'header': + self.gen = HeaderGenerator(self.out,self.idl,self.filename) + elif genType == 'source': + self.gen = SourceGenerator(self.out,self.idl,self.filename) + elif genType == 'factory': + self.gen = GenFactory(_idl) + + def generate(self): + self.gen.generate() diff --git a/pidl/Generators/__init__.py b/pidl/Generators/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/pidl/Generators/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/pidl/IDLConfig.py b/pidl/IDLConfig.py new file mode 100644 index 0000000..40efe7e --- /dev/null +++ b/pidl/IDLConfig.py @@ -0,0 +1,25 @@ +# +# This module will contain all global +# flags to deal with the bahavior of the parser +# + +class IInterfaceConfig: + + def __init__(self,_uuid ): + self._uuid = 0 + + @property + def uuidRequired(self): + return self._uuid == 1 + + @uuidRequired.setter + def uuidRequired(self,value): + if value == 0: + self._uuid = 0 + else: + self._uuid = 1 + + +# Interface must have UUID, can be +interface = IInterfaceConfig(0) + diff --git a/pidl/IDLLexer.py b/pidl/IDLLexer.py new file mode 100644 index 0000000..a39af4a --- /dev/null +++ b/pidl/IDLLexer.py @@ -0,0 +1,159 @@ +# vim: et sw=4 sws=4 ts=4 list +from ply import * + +import IDLUtils +from IDLUtils import Location,IDLError + +class IDLLexer(object): + + keywords = { + 'module' : 'MODULE', + 'const' : 'CONST', + 'interface' : 'INTERFACE', + 'in' : 'IN', + 'inout' : 'INOUT', + 'out' : 'OUT', + 'attribute' : 'ATTRIBUTE', + 'raises' : 'RAISES', + 'readonly' : 'READONLY', + 'native' : 'NATIVE', + 'typedef' : 'TYPEDEF', + 'struct' : 'STRUCT', + 'sequence' : 'SEQUENCE', + 'union' : 'UNION', + 'enum' : 'ENUM', + 'local' : 'LOCAL', + 'abstract' : 'ABSTRACT', + } + + idl3_ccm = { + 'component' : 'COMPONENT', + 'provides' : 'PROVIDES', + 'uses' : 'USES', + 'emits' : 'EMITS', + 'publishes' : 'PUBLISHES', + 'consumes' : 'CONSUMES', + 'home' : 'HOME', + 'manages' : 'MANAGES', + 'factory' : 'FACTORY', + 'supports' : 'SUPPORTS' + } + tokens = [ + 'IDENTIFIER', + 'DIRECTIVE', + 'CDATA', + 'INCLUDE', + 'IID', + 'NUMBER', + 'HEXNUM', + 'LSHIFT', + 'RSHIFT', + 'STRING', + 'NATIVEID' + ] + + keywords.update(idl3_ccm) + tokens.extend(keywords.values()) + + states = ( + ('nativeid', 'exclusive'), + ) + + def __init__(self,outputdir="",debug = 0): + print ("Create Lexer....") + self.lexer = lex.lex(object = self, + outputdir= outputdir, + lextab = 'idllex', optimize = 0) + self._doccomments = [] + + def clearComments(self): + self._doccomments = [] + + hexchar = r'[a-fA-F0-9]' + + t_NUMBER = r'-?\d+' + + t_HEXNUM = r'0x%s+' % hexchar + + t_LSHIFT = r'<<' + + t_RSHIFT = r'>>' + + literals = '"(){}[],;:=|+-*?><' + + t_ignore = ' \t' + + + def t_multilinecomment(self, t): + r'/\*(?s).*?\*/' + t.lexer.lineno += t.value.count('\n') + if t.value.startswith("/**"): + self._doccomments.append(t.value) + + def t_singlelinecomment(self, t): + r'(?m)//.*?$' + + def t_IID(self, t): + return t + t_IID.__doc__ = r'%(c)s{8}-%(c)s{4}-%(c)s{4}-%(c)s{4}-%(c)s{12}' % {'c': hexchar} + + def t_IDENTIFIER(self, t): + r'(unsigned\ long\ long|unsigned\ short|unsigned\ long|long\ long)(?!_?[A-Za-z][A-Za-z_0-9])|_?[A-Za-z][A-Za-z_0-9]*' + t.type = self.keywords.get(t.value, 'IDENTIFIER') + return t + + def t_LCDATA(self, t): + r'(?s)%\{[ ]*C\+\+[ ]*\n(?P.*?\n?)%\}[ ]*(C\+\+)?' + t.type = 'CDATA' + t.value = t.lexer.lexmatch.group('cdata') + t.lexer.lineno += t.value.count('\n') + return t + + def t_INCLUDE(self, t): + r'\#include[ \t]+"[^"\n]+"' + inc, value, end = t.value.split('"') + t.value = value + return t + + def t_STRING(self, t): + r'"[^"\n]+"' + begin, value, end = t.value.split('"') + t.value = value + return t + + def t_directive(self, t): + r'\#(?P[a-zA-Z]+)[ ]*(?P[a-zA-Z_]+)[ ]*(?P[a-zA-Z0-9]*)[\n]+' + if t.lexer.lexmatch.group('directive') == "define": + t.type = 'DIRECTIVE' + t.value = { 'directive' : 'define' + , 'name' : t.lexer.lexmatch.group('name') + , 'value' : t.lexer.lexmatch.group('value')} + return t + else: + raise IDLError("Unrecognized directive %s %s" % (t.lexer.lexmatch.group('directive') , t.lexer.lexmatch.group('opt') ), + Location(lexer=self.lexer, lineno=self.lexer.lineno, + lexpos=self.lexer.lexpos)) + + def t_newline(self, t): + r'\n+' + t.lexer.lineno += len(t.value) + + def t_nativeid_NATIVEID(self, t): + r'[^()\n]+(?=\))' + t.lexer.begin('INITIAL') + return t + + t_nativeid_ignore = '' + + + def t_error(self,t): + print ("Token error ") + + def token(self): + t = self.lexer.token() + if t is not None and t.type != 'CDATA': + t.doccomments = self._doccomments +# print "IDLLexer.token %s " % t.type + self._doccomments = [] + return t + diff --git a/pidl/IDLParser.py b/pidl/IDLParser.py new file mode 100755 index 0000000..f9e5d87 --- /dev/null +++ b/pidl/IDLParser.py @@ -0,0 +1,712 @@ +#!/usr/bin/env python2.7 +# First implementation in Python for IDL syntax parsing +# vim: noet sw=4 sws=4 ts=4 list + +import sys, os.path +sys.path.insert(0,"../.") +sys.path.insert(0,"./") +from ply import * + +import hashlib +import IDLLexer +from IDLLexer import * + +import IDLTypes +from IDLTypes import * + +import IDLUtils +from IDLUtils import Location,IDLError,IDLPLogger,NullLogger + +import Generators.Generator +from Generators.Generator import Generator + +# +# Main class +# +class IDLParser(object): + + logger = NullLogger(); + precedence = ( + ('left', '|'), + ('left', 'LSHIFT', 'RSHIFT'), + ('left', '+', '-'), + ('left', '*'), + ('left', 'UMINUS'), + ) + + + + def __init__(self,outputdir='',debug = 1): + if debug: + self.logger = IDLPLogger(sys.stderr) + + self.IDLlexer = IDLLexer(outputdir,debug) + self.lexer = self.IDLlexer.lexer + self.tokens = self.IDLlexer.tokens + self.logger.debug("IDLParser __init__ create parser") + self.parser = yacc.yacc(module = self, + debug = debug, + outputdir = outputdir, + tabmodule ='idlparsertab') + + + + def p_idlfile(self,p): + """idlfile : productions""" + p[0] = IDL(p[1]) + + def p_productions_start(self,p): + """productions : """ + p[0] = [] + + def p_productions_cdata(self,p): + """productions : CDATA productions""" + p[0] = list(p[2]) + p[0].insert(0, CDATA(p[1], self.getLocation(p,1))) + + def p_productions_include(self,p): + """productions : INCLUDE productions""" + p[0] = list(p[2]) + p[0].insert(0, INCLUDE(p[1], self.getLocation(p,1))) + + def p_productions_directive(self,p): + """productions : DIRECTIVE productions""" + p[0] = p[2] + p[0].insert(0, Define(p[1], self.getLocation(p,1))) + + def p_productions_interface(self,p): + """productions : interface productions + | native productions + | comp productions + | home productions + | typedecl productions""" + p[0] = list(p[2]) + p[0].insert(0,p[1]) + + def p_productions_module(self,p): + """productions : module productions""" + print ( "got MODULE len(p)=",len(p)," p[1]=",file=sys.stderr) + p[0] = list(p[2]) + p[0].insert(0,p[1]) + + def p_module(self,p): + """module : MODULE IDENTIFIER '{' productions '}' ';'""" + p[0] = Module(_name = p[2] + ,_productions = p[4] + ,_loc = self.getLocation(p,1) + ) + + def p_native(self,p): + """native : annotations NATIVE IDENTIFIER afternativeid '(' NATIVEID ')' ';' """ + p[0] = Native(name = p[3] + , nativename = p[6] + , attlist = p[1]['annotlist'] + , location = self.getLocation(p,1) + ) + + def p_afternativeid(self, p): + """afternativeid : """ + # this is a place marker: we switch the lexer into literal identifier + # mode here, to slurp up everything until the closeparen + self.lexer.begin('nativeid') + + def p_typedecl(self,p): + """typedecl : typedef + | enumtype + | structtype""" + p[0] = p[1] + + def p_typedecl_constdecl(self,p): + """typedecl : CONST IDENTIFIER IDENTIFIER '=' number ';' + | CONST IDENTIFIER IDENTIFIER '=' STRING ';' + """ + _loc = p[5] + if p[2] == "string": + _type = lambda i: _loc + else: + _type = p[5] + p[0] = ConstDecl(type=p[2], name=p[3], + value=_type, location=self.getLocation(p, 1), + doccomments=p.slice[1].doccomments) + + def p_typedef(self,p): + """typedef : TYPEDEF type_spec declarator ';'""" + self.logger.debug("Got typedef %s" % p[3]) + p[0] = Typedef(type=p[2],declarator = p[3],location = self.getLocation(p,1),doccomments = p.slice[1].doccomments) + + def p_type_spec(self,p): + """type_spec : IDENTIFIER + | SEQUENCE '<' IDENTIFIER '>' """ + if len(p) > 2: + #p[0] = p[3] + p[0] = SequenceType(p[3]) + else: + p[0] = p[1] + + def p_declarator(self,p): + """declarator : IDENTIFIER + | IDENTIFIER fixedarraysizes""" + if len(p) > 2: + self.logger.info("Got declarator with SIZES %d" % len(p[2])) + p[0] = Declarator(p[1],p[2]) + else: + p[0] = Declarator(p[1]) + + + def p_fixedarraysizes_start(self,p): + """fixedarraysizes : """ + p[0] = [] + + def p_fixedarraysizes_continue(self,p): + """fixedarraysizes : fixedarraysize fixedarraysizes""" + p[0] = list(p[2]) + p[0].insert(0,p[1]) + + def p_fixedarraysize(self,p): + """fixedarraysize : '[' number ']'""" + p[0] = p[2] + + def p_structtype(self,p): + """structtype : annotations STRUCT IDENTIFIER '{' smembers '}' ';' """ + self.logger.debug("Got Struct %s",p[2]) + if 'doccomments' in p[1]: + doccomments = p[1]['doccomments'] + else: + doccomments = p.slice[2].doccomments + + p[0] = Struct(name = p[3] + , attlist = p[1] + , members = p[5] + , location = self.getLocation(p,1) + , doccomments = doccomments + ) + def p_smembers_start(self,p): + """smembers : """ + p[0] = [] + + def p_smembers_continue(self,p): + """smembers : smember smembers""" + p[0] = list(p[2]) + p[0].insert(0,p[1]) + + def p_smember(self,p): + """smember : annotations type_spec declarator ';' """ + if 'doccomments' in p[1]: + doccomments = p[1]['doccomments'] + else: + #doccomments = p.slice[2].doccomments + doccomments = [] + + p[0] = StructMember( type = p[2] + , name = p[3] + , attlist = p[1]['annotlist'] + , location = self.getLocation(p,1) + , doccomments = doccomments) + + def p_enumtype(self,p): + """enumtype : annotations ENUM IDENTIFIER '{' enumerators '}' ';' """ + self.logger.debug("Got Enum %s",p[2]) + if 'doccomments' in p[1]: + doccomments = p[1]['doccomments'] + else: + doccomments = p.slice[2].doccomments + + p[0] = Enum(name = p[3] + , attlist = p[1] + , enumerators = p[5] + , location = self.getLocation(p,1) + , doccomments = doccomments + ) + + def p_enumerators_start(self,p): + """enumerators : enumerator moreenums + | """ + if len(p) == 1: + p[0] = [] + else: + p[0] = list(p[2]) + p[0].insert(0,p[1]) + + def p_moreenums_start(self,p): + """moreenums : """ + p[0] = [] + + def p_moreenums_continue(self,p): + """moreenums : ',' enumerator moreenums""" + p[0] = list(p[3]) + p[0].insert(0,p[2]) + + def p_enumerator(self,p): + """enumerator : IDENTIFIER + | IDENTIFIER '=' number """ + if len(p) > 2: + print >>sys.stderr, "len(p)=",len(p)," p[1]=",p[1] + p[0]= Enumerator(name = p[1] , value = p[2]) + else: + p[0]= Enumerator(p[1]) + + def p_interface(self,p): + """interface : annotations interface_header IDENTIFIER ifacebase ifacebody ';'""" + annotations, INTERFACE,name,base,body,SEMI = p[1:] + annotlist = annotations['annotlist'] + doccomments = [] + l = lambda : self.getLocation(p,2) + + if body is None: + self.logger.debug("Got Forward declartion") + p[0] = Forward(name = name, + location = self.getLocation(p,2), doccomments = doccomments) + else: + self.logger.debug("Got Interface %s" % name ) + p[0] = Interface(name = name, + attlist = annotlist, + base = base, + members = body, + location = l(), + doccomments = doccomments) + p[0].setModifier(INTERFACE['modifier']) + + def p_interface_header(self,p): + """interface_header : INTERFACE + | LOCAL INTERFACE + | ABSTRACT INTERFACE""" + if len(p) > 2: + self.logger.debug("Got Basic interface header") + p[0] = {'modifier' : p[1]} + else: + p[0] = {'modifier' : None} + + def p_ifacebase(self,p): + """ifacebase : ':' IDENTIFIER ifacebase_more + | """ + if len(p) > 1: + p[0] = p[2] + + def p_ifacebase_more_start(self,p): + """ifacebase_more : """ + p[0] = [] + + def p_ifacebase_more_continue(self,p): + """ifacebase_more : ',' IDENTIFIER ifacebase_more""" + p[0] = list(p[3]) + p[0].insert(0,p[2]) + + + def p_ifacebody(self,p): + """ifacebody : '{' imembers '}' + |""" + if len(p)> 1: + p[0] = p[2] + + def p_imembers_start(self,p): + """imembers : """ + p[0] = [] + + def p_imembers_continue(self,p): + """imembers : imember imembers""" + p[0] = list(p[2]) + p[0].insert(0,p[1]) + +# def p_imember(self,p): +# """imember : IDENTIFIER ';'""" + + def p_imember_const(self, p): + """imember : CONST IDENTIFIER IDENTIFIER '=' number ';' """ + self.logger.debug("IDLParser: imember const type=(%s) name=(%s)",p[2],p[3]) + p[0] = ConstMember(type=p[2], name=p[3], + value=p[5], location=self.getLocation(p, 1), + doccomments=p.slice[1].doccomments) + +# All "number" products return a function(interface) + + def p_number_decimal(self, p): + """number : NUMBER""" + n = int(p[1]) + p[0] = lambda i: n + + def p_number_hex(self, p): + """number : HEXNUM""" + n = int(p[1], 16) + p[0] = lambda i: n + + def p_number_identifier(self, p): + """number : IDENTIFIER""" + id = p[1] + loc = self.getLocation(p, 1) + p[0] = lambda i: i.getConst(id, loc) + + def p_number_paren(self, p): + """number : '(' number ')'""" + p[0] = p[2] + + def p_number_neg(self, p): + """number : '-' number %prec UMINUS""" + n = p[2] + p[0] = lambda i: - n(i) + + def p_number_add(self, p): + """number : number '+' number + | number '-' number + | number '*' number""" + n1 = p[1] + n2 = p[3] + if p[2] == '+': + p[0] = lambda i: n1(i) + n2(i) + elif p[2] == '-': + p[0] = lambda i: n1(i) - n2(i) + else: + p[0] = lambda i: n1(i) * n2(i) + + def p_number_shift(self, p): + """number : number LSHIFT number + | number RSHIFT number""" + n1 = p[1] + n2 = p[3] + if p[2] == '<<': + p[0] = lambda i: n1(i) << n2(i) + else: + p[0] = lambda i: n1(i) >> n2(i) + + def p_number_bitor(self, p): + """number : number '|' number""" + n1 = p[1] + n2 = p[3] + p[0] = lambda i: n1(i) | n2(i) + + def p_imember_attr(self,p): + """imember : annotations optreadonly ATTRIBUTE IDENTIFIER IDENTIFIER ';'""" + if 'doccomments' in p[1]: + doccomments = p[1]['doccomments'] + elif p[2] is not None: + doccomments = p[2] + else: + doccomments = p.slice[3].doccomments + + p[0] = Attribute( type = p[4] + , name = p[5] + , attlist = p[1]['annotlist'] + , readonly = p[2] is not None + , nullable = False + , defvalue = None + , location = self.getLocation(p,3) + , doccomments = doccomments + ) + + def p_imember_method(self,p): + """imember : annotations IDENTIFIER IDENTIFIER '(' paramlist ')' raises ';'""" + self.logger.debug("Got Method %s" % p[3]) + if 'doccomments' in p[1]: + doccomments = p[1]['doccomments'] + else: + doccomments = p.slice[2].doccomments + + p[0] = Method(type = p[2],name = p[3],attlist =p[1]['annotlist'] + , paramlist = p[5] + , location = self.getLocation(p,3) + , doccomments = doccomments + , raises = p[7] + ) + + def p_paramlist(self,p): + """paramlist : param moreparams + | """ + if len(p) == 1: + p[0] = [ ] + else: + p[0] = list(p[2]) + p[0].insert(0,p[1]) + + def p_moreparams_start(self,p): + """moreparams : """ + p[0] = [] + + def p_moreparams_continue(self,p): + """moreparams : ',' param moreparams""" + p[0] = list(p[3]) + p[0].insert(0,p[2]) + + def p_param(self,p): + """param : annotations paramdirection IDENTIFIER identifier""" + self.logger.debug("Got param type=%s %s",p[2],p[3]) + p[0] = Param( paramtype = p[2] + , type = p[3] + , name = p[4] + , attlist = p[1]['annotlist'] + , location = self.getLocation(p,3) + ) + + def p_paramdirection(self,p): + """paramdirection : IN + | INOUT + | OUT """ + p[0] = p[1] + + def p_optreadonly(self,p): + """optreadonly : READONLY + | """ + if len(p) > 1: + p[0] = p.slice[1].doccomments + else: + p[0] = None + def p_identifier(self,p): + """ identifier : IDENTIFIER""" + p[0] = p[1] + + def p_annotations(self,p): + """annotations : '[' annotlist ']' + | """ + if len(p) == 1: + p[0] = {'annotlist' : []} + else: + p[0] = {'annotlist' : p[2] } + + def p_annotlist_start(self,p): + """annotlist : annotation""" + p[0] = [p[1]] + + def p_annotlist_continue(self,p): + """annotlist : annotation ',' annotlist""" + p[0] = list(p[3]) + p[0].insert(0,p[1]) + + def p_annotation(self,p): + """annotation : IDENTIFIER annotationvalue""" + self.logger.debug("Got annotation %s with value %s" % (p[1],p[2])) + p[0] = (p[1],p[2],self.getLocation(p,1)) + + def p_annotationvalue(self,p): + """annotationvalue : '(' IDENTIFIER ')' + | '(' IID ')' + | """ + if len(p) > 1: + p[0] = p[2] + self.logger.debug("Got annotation value (%s)" % p[2]) + + def p_raises(self,p): + """raises : RAISES '(' idlist ')' + | """ + if len(p) == 1: + p[0] = [] + else: + p[0] = p[3] + + def p_idlist(self,p): + """idlist : IDENTIFIER""" + p[0] = [ p[1] ] + + def p_idlist_continue(self,p): + """idlist : IDENTIFIER ',' idlist""" + p[0] = list(p[3]) + p[0].insert(0,p[1]) +# +# Ok handle component declaration +# + def p_comp(self,p): + """comp : COMPONENT IDENTIFIER component_base component_body ';'""" + self.logger.debug("Got Component name=%s %s",p[2],p[3]) + p[0] = Component(p[2],p[4]) + + def p_component_base(self,p): + """component_base : ':' IDENTIFIER + | """ + p[0] = [] + if len(p) > 1: + p[0] = p[2] + + def p_component_body(self,p): + """component_body : '{' component_exports '}'""" + p[0] = p[2] + + def p_component_exports_start(self,p): + """component_exports : """ + p[0] = [] + + def p_component_exports_continue(self,p): + """component_exports : component_export component_exports""" + p[0] = list(p[2]) + p[0].insert(0,p[1]) + + def p_component_export_provides(self,p): + """component_export : PROVIDES scoped_name IDENTIFIER ';'""" + p[0] = ComponentProvides(p[3],p[2]) + + def p_component_export_uses(self,p): + """component_export : USES scoped_name IDENTIFIER ';'""" + p[0] = ComponentUses(p[3],p[2]) + + def p_component_export_attr(self,p): + """component_export : annotations optreadonly ATTRIBUTE IDENTIFIER IDENTIFIER ';'""" + if 'doccomments' in p[1]: + doccomments = p[1]['doccomments'] + elif p[2] is not None: + doccomments = p[2] + else: + doccomments = p.slice[3].doccomments + + p[0] = Attribute( type = p[4] + , name = p[5] + , attlist = p[1]['annotlist'] + , readonly = p[2] is not None + , nullable = False + , defvalue = None + , location = self.getLocation(p,3) + , doccomments = doccomments + ) + + def p_home(self,p): + """home : HOME home_header '{' home_body '}' ';' """ + p[0] = p[2] + p[0].Exports = p[4] + + def p_home_header(self,p): + """home_header : IDENTIFIER MANAGES scoped_name""" + p[0] = Home(p[1]) + return p[0] + + def p_home_body_start(self,p): + """home_body : """ + p[0] = [] + + def p_home_body_continue(self,p): + """home_body : factory_decl home_body""" + p[0] = list(p[2]) + p[0].insert(0,p[1]) + + def p_factory_decl(self,p): + """factory_decl : FACTORY IDENTIFIER '(' factory_params ')' ';'""" + p[0] = Factory(p[2],p[4]) + + def p_factory_params(self,p): + """factory_params : factory_param more_factory_params + | + """ + if len(p) == 1: + p[0] = [] + else: + p[0] = list(p[2]) + p[0].insert(0,p[1]) + + + def p_more_factory_params_start(self,p): + """more_factory_params : """ + p[0] = [] + + def p_more_factory_params_continue(self,p): + """more_factory_params : ',' factory_param more_factory_params """ + p[0] = list(p[3]) + p[0].insert(0,p[2]) + + def p_factory_param(self,p): + """factory_param : IN IDENTIFIER IDENTIFIER""" + p[0] = Param( paramtype = "in" + , type = p[2] + , name = p[3] + , attlist = [ { 'Null' : None , 'Empty' : '' , 'loc' : self.getLocation(p,3)} ] + , location = self.getLocation(p,3) + ) + + def p_scoped_name(self,p): + """scoped_name : IDENTIFIER + | ':' ':' IDENTIFIER + | scoped_name ':' ':' IDENTIFIER """ + if len(p) > 4: + p[0] = p[1] + "::" + p[4] + elif len(p) > 3: + p[0] = p[3] + elif len(p) > 1: + p[0] = p[1] + + def p_error(self,p): + location = Location(self.lexer,p.lineno,p.lexpos) + self.logger.error("IDL Parser error line %s",location) + + def getLocation(self,p,i): + location = Location(self.lexer,self.lexer.lineno,self.lexer.lexpos) + return location + #return Location(self.lexer,p.lineno(i),p.lexpos(i)) + + def parse_files(self,files,debug = 0): + self.logger.debug("IDLParser parse files ... debug %d" % debug) + self.parser.error = 0 + p = None + first = files.pop() + self.lexer.filename = first + self.lexer.files = files + fd = open(first) + self.lexer.input(fd.read()) + try: + p = self.parser.parse( + lexer = self.IDLlexer, + debug=debug) + except IDLError as e: + print("Parsing exception " % e) + fd.close() + if self.parser.error: + return None + return p + + + + +if __name__ == '__main__': + from optparse import OptionParser + o = OptionParser() + o.add_option('-I',action='append',dest='incdirs',default=['.'], + help="Directory to search for imported files") + + o.add_option('-D',dest='outdir',default=None, + help="Directory to generate files") + + o.add_option('-d',action='store_true', dest='debug', default=False, + help='Set Debug for parser') + + o.add_option('-f',action="append",dest='files', default=[], + help="File to parse") + + o.add_option('-o', dest='outfile', default=None, + help="Output file (default is stdout)") + + o.add_option('-m', dest='gen', default='header', + help="Generating : \t\t\t\t\t\t\t\t" + "header : c++ header file\t\t\t\t\t\t" + "source : C++ source file\t\t\t\t\t\t" + "lds : lds package definition from interface") + + options,args = o.parse_args() + + if options.outdir is not None: + if not os.path.isdir(options.outdir): + os.mkdir(options.outdir) + + if options.files is None: + print >>sys.stderr, "-f file required" + sys.exit(1) + else: + filename = options.files[0] + + print ("IDLParser Launched copyright 2017-2019, Andre Ebersold") + p = IDLParser() + print ("IDLParser Parsing...") + idl = p.parse_files(options.files,options.debug); + print ("IDLParser ************************") + print ("IDLParser Resolving references ...") + print ("IDLParser ************************") + try: + idl.resolve(".",p) + except IDLError as e: + print ("Resolve Exception : %s " % e) + exit(0) + print ("***\n* Result\n***\n%s" % idl) + + # If output file generate header + if options.outfile is not None: + ofd = open(options.outfile,"w") + closeFd = True + else: + ofd = sys.stdout + closeFd = False + + lgen = Generator(options.gen,ofd,idl,"",filename) + lgen.generate() + if closeFd: + ofd.close() diff --git a/pidl/IDLTypes.py b/pidl/IDLTypes.py new file mode 100644 index 0000000..639a500 --- /dev/null +++ b/pidl/IDLTypes.py @@ -0,0 +1,1368 @@ +# vim: noet sw=4 sws=4 ts=4 list +import re +import IDLUtils +from IDLUtils import Location,IDLError + +import IDLConfig + + +def attlistToIDL(attlist): + if len(attlist) == 0: + return '' + + attlist = list(attlist) + #attlist.sort(cmp=lambda a,b: cmp(a[0], b[0])) + attlist.sort(key=lambda a: a[0]) + + return '[%s] ' % ','.join(["%s%s" % (name, value is not None and '(%s)' % value or '') + for name, value, aloc in attlist]) + +_paramsHardcode = { + 2: ('array', 'shared', 'iid_is', 'size_is', 'retval'), + 3: ('array', 'size_is', 'const'), +} + +def paramAttlistToIDL(attlist): + if len(attlist) == 0: + return '' + + # Hack alert: g_hash_table_foreach is pretty much unimitatable... hardcode + # quirk + attlist = list(attlist) + sorted = [] + if len(attlist) in _paramsHardcode: + for p in _paramsHardcode[len(attlist)]: + i = 0 + while i < len(attlist): + if attlist[i][0] == p: + sorted.append(attlist[i]) + del attlist[i] + continue + + i += 1 + + sorted.extend(attlist) + + return '[%s] ' % ', '.join(["%s%s" % (name, value is not None and ' (%s)' % value or '') + for name, value, aloc in sorted]) + +def unaliasType(t): + while t.kind == 'typedef': + t = t.realtype + assert t is not None + return t + +def getBuiltinOrNativeTypeName(t): + t = unaliasType(t) + if t.kind == 'builtin': + return t.name + elif t.kind == 'native': + assert t.specialtype is not None + return '[%s]' % t.specialtype + else: + return None + + +class BuiltinLocation(object): + def get(self): + return "" + + def __str__(self): + return self.get() + +class Builtin(object): + kind = 'builtin' + location = BuiltinLocation + + def __init__(self, name, nativename, signed=False, maybeConst=False): + self.name = name + self.nativename = nativename + self.signed = signed + self.maybeConst = maybeConst + + def isScriptable(self): + return True + + def nativeType(self, calltype, shared=False, const=False): + if const: + print >>sys.stderr, IDLError("[const] doesn't make sense on builtin types.", self.location, warning=True) + const = 'const ' + elif calltype == 'in' and self.nativename.endswith('*'): + const = 'const ' + elif shared: + if not self.nativename.endswith('*'): + raise IDLError("[shared] not applicable to non-pointer types.", self.location) + const = 'const ' + else: + const = '' + return "%s%s %s" % (const, self.nativename, + calltype != 'in' and '*' or '') + +builtinNames = [ + Builtin('boolean', 'bool'), + Builtin('void', 'void'), + Builtin('octet', 'uint8_t'), + Builtin('short', 'int16_t', True, True), + Builtin('long', 'int32_t', True, True), + Builtin('long long', 'int64_t', True, False), + Builtin('unsigned short', 'uint16_t', False, True), + Builtin('unsigned long', 'uint32_t', False, True), + Builtin('unsigned long long', 'uint64_t', False, False), + Builtin('float', 'float', True, False), + Builtin('double', 'double', True, False), + Builtin('char', 'char', True, False), + Builtin('string', 'char *', False, True), + Builtin('wchar', 'PRUnichar', False, False), + Builtin('wstring', 'PRUnichar *', False, False), +] + +builtinMap = {} +for b in builtinNames: + builtinMap[b.name] = b + + +class NameMap(object): + """Map of name -> object. Each object must have a .name and .location property. + Setting the same name twice throws an error.""" + def __init__(self): + self._d = {} + + def __getitem__(self, key): + if key in builtinMap: + return builtinMap[key] + return self._d[key] + + def __iter__(self): + return self._d.itervalues() + + def __contains__(self, key): + return key in builtinMap or key in self._d + + def set(self, object): + if object.name in builtinMap: + raise IDLError("name '%s' is a builtin and cannot be redeclared" % (object.name), object.location) + if object.name.startswith("_"): + object.name = object.name[1:] + if object.name in self._d: + old = self._d[object.name] + if old == object: return + if isinstance(old, Forward) and isinstance(object, Interface): + self._d[object.name] = object + elif isinstance(old, Interface) and isinstance(object, Forward): + pass + else: + raise IDLError("name '%s' specified twice. Previous location: %s" % (object.name, self._d[object.name].location), object.location) + else: + self._d[object.name] = object + + def get(self, id, location): + try: + return self[id] + except KeyError: + raise IDLError("NameMpa::get Name '%s' not found", location) + + + + + + +class Include(object): + kind = 'include' + + def __init__(self, filename, location): + self.filename = filename + self.location = location + + def __str__(self): + return "".join(["include '%s'\n" % self.filename]) + + def resolve(self, parent): + def incfiles(): + yield self.filename + for dir in parent.incdirs: + yield os.path.join(dir, self.filename) + + for file in incfiles(): + if not os.path.exists(file): continue + + self.IDL = parent.parser.parse(open(file).read(), filename=file) + self.IDL.resolve(parent.incdirs, parent.parser) + for type in self.IDL.getNames(): + parent.setName(type) + parent.deps.extend(self.IDL.deps) + return + + raise IDLError("Include::File '%s' not found" % self.filename, self.location) + + +class Scope(object): + def __init__(self): + self.namemap = NameMap() + + def setName(self, obj): + #if self.kind == 'module': + #print("Scope<%s>::setName %s" % (self.name,obj.name)) + self.namemap.set(obj) + + def getName(self, id, location): + try: + return self.namemap[id] + except KeyError: + if self.kind == 'module': + #print("Scope<%s>::getName %s" % (self.name,id)) + return self.module.getName(id,location) + else: + raise IDLError("Scope::(kind=%s) type '%s' not found" % (self.kind,id), location) + + def hasName(self, id): + return id in self.namemap + + def getNames(self): + return iter(self.namemap) + + +class IDL(Scope): + kind = 'file' + def __init__(self, productions): + Scope.__init__(self) + self.productions = productions + self.deps = [] + + def getConst(self, name, location): + # The constant in Global Scope + c = self.getName(name, location) + if c.kind != 'const': + raise IDLError("IDL::symbol '%s' is not a constant", c.location) + return c.getValue() + + def __str__(self): + return "".join([str(p) for p in self.productions]) + + def resolve(self, incdirs, parser): + self.namemap = NameMap() + self.incdirs = incdirs + self.parser = parser + for p in self.productions: + p.resolve(self) + + def includes(self): + for p in self.productions: + if p.kind == 'include': + yield p + + def needsJSTypes(self): + for p in self.productions: + if p.kind == 'interface' and p.needsJSTypes(): + return True + return False + +class Module(Scope): + kind ='module' + module = None + idl = None + def __init__(self,_name,_productions,_loc): + Scope.__init__(self) + self.name = _name + self.productions= _productions + self.location = _loc + + def __str__(self): + l = ["module %s\n" % self.name] + for p in self.productions: + l.append(str(p)) + return "\t".join(l) + + def resolve(self, parent): + print("Module:: resolve kind=%s" % self.kind) + if parent.kind == 'file': + self.idl = parent + self.module = parent + else: + self.module = parent + for p in self.productions: + p.resolve(self) + + +class CDATA(object): + kind = 'cdata' + _re = re.compile(r'\n+') + + def __init__(self, data, location): + self.data = self._re.sub('\n', data) + self.location = location + + def resolve(self, parent): + pass + + def __str__(self): + return "cdata: %s\n\t%r\n" % (self.location.get(), self.data) + + def count(self): + return 0 + +class Typedef(object): + kind = 'typedef' + + def __init__(self, type, declarator, location, doccomments): + self.type = type + self.name = declarator.name + self.declarator = declarator + self.location = location + self.doccomments = doccomments + + def __eq__(self, other): + return self.name == other.name and self.type == other.type + + def resolve(self, parent): + parent.setName(self) + if isinstance(self.type,SequenceType): + print("Typedef::Resolve Sequence") + parent.setName(self.type) + self.realtype = parent.getName(self.type, self.location) + self.declarator.resolve(parent) + + def isScriptable(self): + return self.realtype.isScriptable() + + def nativeType(self, calltype): + return "%s %s" % (self.name, + calltype != 'in' and '*' or '') + + def __str__(self): + return "typedef %s %s\n" % (self.type, self.name) + + +class Define(object): + """ Define Directive """ + kind = 'define' + + def __init__(self, name, location, doccomments = ""): + self.name = name['name'] + self.value = name['value'] + self.location = location + self.doccomments = doccomments + + def __eq__(self, other): + return other.kind == 'define' and other.name == self.name + + def resolve(self, parent): + parent.setName(self) + if not self.value == '' and not self.value.isdigit(): + self.realtype = parent.getName(self.value, self.location) + + def nativeType(self, calltype): + return "%s %s" % (self.name, + calltype != 'in' and '*' or '') + + def __str__(self): + return "define %s \"%s\"\n" % (self.name, self.value) + + + +class Forward(object): + """ Forward Declaration Class for Interface and Structs """ + kind = 'forward' + + def __init__(self, name, location, doccomments): + self.name = name + self.location = location + self.doccomments = doccomments + + def __eq__(self, other): + return other.kind == 'forward' and other.name == self.name + + def resolve(self, parent): + # Hack alert: if an identifier is already present, move the doccomments + # forward. + if parent.hasName(self.name): + for i in xrange(0, len(parent.productions)): + if parent.productions[i] is self: break + for i in xrange(i + 1, len(parent.productions)): + if hasattr(parent.productions[i], 'doccomments'): + parent.productions[i].doccomments[0:0] = self.doccomments + break + + parent.setName(self) + + def isScriptable(self): + return True + + def nativeType(self, calltype): + return "%s %s" % (self.name, + calltype != 'in' and '* *' or '*') + + def __str__(self): + return "forward-declared %s\n" % self.name + +class Native(object): + kind = 'native' + + modifier = None + specialtype = None + + specialtypes = { + 'nsid': None, + 'domstring': 'nsAString', + 'utf8string': 'nsACString', + 'cstring': 'nsACString', + 'astring': 'nsAString', + 'jsval': 'JS::Value' + } + + def __init__(self, name, nativename, attlist, location): + self.name = name + self.nativename = nativename + self.location = location + + for name, value, aloc in attlist: + if value is not None: + raise IDLError("Native::Unexpected attribute value", aloc) + if name in ('ptr', 'ref'): + if self.modifier is not None: + raise IDLError("Native::More than one ptr/ref modifier", aloc) + self.modifier = name + elif name in self.specialtypes.keys(): + if self.specialtype is not None: + raise IDLError("Native::More than one special type", aloc) + self.specialtype = name + if self.specialtypes[name] is not None: + self.nativename = self.specialtypes[name] + else: + raise IDLError("Native::Unexpected attribute", aloc) + + def __eq__(self, other): + return self.name == other.name and \ + self.nativename == other.nativename and \ + self.modifier == other.modifier and \ + self.specialtype == other.specialtype + + def resolve(self, parent): + parent.setName(self) + + def isScriptable(self): + if self.specialtype is None: + return False + + if self.specialtype == 'nsid': + return self.modifier is not None + + return self.modifier == 'ref' + + def isPtr(self, calltype): + return self.modifier == 'ptr' or (self.modifier == 'ref' and self.specialtype == 'jsval' and calltype == 'out') + + def isRef(self, calltype): + return self.modifier == 'ref' and not (self.specialtype == 'jsval' and calltype == 'out') + + def nativeType(self, calltype, const=False, shared=False): + if shared: + if calltype != 'out': + raise IDLError("[shared] only applies to out parameters.") + const = True + + if self.specialtype is not None and calltype == 'in': + const = True + + if self.isRef(calltype): + m = '& ' + elif self.isPtr(calltype): + m = '*' + ((self.modifier == 'ptr' and calltype != 'in') and '*' or '') + else: + m = calltype != 'in' and '*' or '' + return "%s%s %s" % (const and 'const ' or '', self.nativename, m) + + def __str__(self): + return "native %s(%s)\n" % (self.name, self.nativename) + +class BaseInterface(object): + def __init__(self, name, attlist, base, members, location, doccomments): + self.name = name + self.attributes = InterfaceAttributes(attlist, location) + self.base = base + if self.kind == 'dictionary': + members.sort(key=lambda x:x.name) + self.members = members + self.location = location + self.namemap = NameMap() + self.doccomments = doccomments + self.nativename = name + + for m in members: + if not isinstance(m, CDATA): + self.namemap.set(m) + + def __eq__(self, other): + return self.name == other.name and self.location == other.location + + def resolve(self, parent): + self.idl = parent + + # Hack alert: if an identifier is already present, libIDL assigns + # doc comments incorrectly. This is quirks-mode extraordinaire! + if parent.hasName(self.name): + for member in self.members: + if hasattr(member, 'doccomments'): + member.doccomments[0:0] = self.doccomments + break + self.doccomments = parent.getName(self.name, None).doccomments + + if self.attributes.function: + has_method = False + for member in self.members: + if member.kind == 'method': + if has_method: + raise IDLError("interface '%s' has multiple methods, but marked 'function'" % self.name, self.location) + else: + has_method = True + + parent.setName(self) + if self.base is not None: + realbase = parent.getName(self.base, self.location) + if realbase.kind != self.kind: + raise IDLError("%s '%s' inherits from non-%s type '%s'" % (self.kind, self.name, self.kind, self.base), self.location) + + if self.attributes.scriptable and not realbase.attributes.scriptable: + print >>sys.stderr, IDLError("interface '%s' is scriptable but derives from non-scriptable '%s'" % (self.name, self.base), self.location, warning=True) + + if self.attributes.scriptable and realbase.attributes.builtinclass and not self.attributes.builtinclass: + raise IDLError("interface '%s' is not builtinclass but derives from builtinclass '%s'" % (self.name, self.base), self.location) + + for member in self.members: + member.resolve(self) + + # The number 250 is NOT arbitrary; this number is the maximum number of + # stub entries defined in xpcom/reflect/xptcall/public/genstubs.pl + # Do not increase this value without increasing the number in that + # location, or you WILL cause otherwise unknown problems! + if self.countEntries() > 250 and not self.attributes.builtinclass: + raise IDLError("interface '%s' has too many entries" % self.name, + self.location) + + def isScriptable(self): + # NOTE: this is not whether *this* interface is scriptable... it's + # whether, when used as a type, it's scriptable, which is true of all + # interfaces. + return True + + def setModifier(self,modifier): + self.modifier = modifier + + def isAbstract(self): + return self.modifier == "abstract" + + def isLocal(self): + return self.modifier == "local" + + def nativeType(self, calltype, const=False): + return "%s%s %s" % (const and 'const ' or '', + self.name, + calltype != 'in' and '* *' or '*') + + def __str__(self): + l = ["interface %s\n" % self.name] + if self.base is not None: + l.append("\tbase %s\n" % self.base) + l.append(str(self.attributes)) + if self.members is None: + l.append("\tincomplete type\n") + else: + for m in self.members: + l.append(str(m)) + return "".join(l) + + def getConst(self, name, location): + # The constant may be in a base class + iface = self + while name not in iface.namemap and iface is not None: + iface = self.idl.getName(self.base, self.location) + if iface is None: + raise IDLError("cannot find symbol '%s'" % name, c.location) + c = iface.namemap.get(name, location) + if c.kind != 'const': + raise IDLError("symbol '%s' is not a constant", c.location) + + return c.getValue() + + def needsJSTypes(self): + for m in self.members: + if m.kind == "attribute" and m.type == "jsval": + return True + if m.kind == "method" and m.needsJSTypes(): + return True + return False + + def countEntries(self): + ''' Returns the number of entries in the vtable for this interface. ''' + total = sum(member.count() for member in self.members) + if self.base is not None: + realbase = self.idl.getName(self.base, self.location) + total += realbase.countEntries() + return total + +class Interface(BaseInterface): + kind = 'interface' + + def __init__(self, name, attlist, base, members, location, doccomments): + BaseInterface.__init__(self, name, attlist, base, members, location, doccomments) + if IDLConfig.interface.uuidRequired: + if self.attributes.uuid is None: + raise IDLError("interface %s has no uuid" % name, location) + +class Dictionary(BaseInterface): + kind = 'dictionary' + + def __init__(self, name, attlist, base, members, location, doccomments): + BaseInterface.__init__(self, name, attlist, base, members, location, doccomments) + +class InterfaceAttributes(object): + uuid = None + scriptable = False + builtinclass = False + function = False + deprecated = False + noscript = False + + def setuuid(self, value): + self.uuid = value.lower() + + def setscriptable(self): + self.scriptable = True + + def setfunction(self): + self.function = True + + def setnoscript(self): + self.noscript = True + + def setbuiltinclass(self): + self.builtinclass = True + + def setdeprecated(self): + self.deprecated = True + + actions = { + 'uuid': (True, setuuid), + 'scriptable': (False, setscriptable), + 'builtinclass': (False, setbuiltinclass), + 'function': (False, setfunction), + 'noscript': (False, setnoscript), + 'deprecated': (False, setdeprecated), + 'object': (False, lambda self: True), + } + + def __init__(self, attlist, location): + def badattribute(self): + raise IDLError("Unexpected interface attribute '%s'" % name, location) + + for name, val, aloc in attlist: + hasval, action = self.actions.get(name, (False, badattribute)) + if hasval: + if val is None: + raise IDLError("Expected value for attribute '%s'" % name, + aloc) + + action(self, val) + else: + if val is not None: + raise IDLError("Unexpected value for attribute '%s'" % name, + aloc) + + action(self) + + def __str__(self): + l = [] + if self.uuid: + l.append("\tuuid: %s\n" % self.uuid) + if self.scriptable: + l.append("\tscriptable\n") + if self.builtinclass: + l.append("\tbuiltinclass\n") + if self.function: + l.append("\tfunction\n") + return "".join(l) + + +class ConstDecl(object): + kind = 'const' + _global = False + idl = None + def __init__(self, type, name, value, location, doccomments): + self.type = type + self.name = name + self.value = value + self.location = location + self.doccomments = doccomments + + def resolve(self, parent): + if type == "string": + self.type = parent.getName(self.type, self.location) + self.realtype = parent.getName(self.type, self.location) + basetype = self.realtype + self.idl = parent + self._global = True + while isinstance(basetype, Typedef): + basetype = basetype.realtype + if isinstance(basetype,Define): + basetype = basetype.realtype + if not isinstance(basetype, Builtin) or not basetype.maybeConst: + raise IDLError("const may only be a short or long type, not %s" % self.type, self.location) + parent.setName(self) + self.basetype = basetype + + def getConst(self, name, location): + # The constant may be in a base class + c = self.idl.getName(name, self.location) + if c.kind != 'const': + raise IDLError("symbol '%s' is not a constant", c.location) + return c.getValue() + + def getValue(self): + _ret ="Exp" + try: + _ret = self.value(self.idl) + except Exception as e: + print("Const %s %s " % (self.name,e)) + exit(0) + return _ret + + def __str__(self): + return "%sconst %s %s = %s\n" % ( not self._global and '\t' or '',self.type, self.name, self.getValue()) + + def count(self): + return 0 + +class ConstMember(ConstDecl): + """This class represents a constant member of an interface + """ + def __init__(self,type,name,value,location,doccomments): + ConstDecl.__init__(self,type,name,value,location,doccomments) + + def resolve(self,parent): + self.realtype = parent.idl.getName(self.type, self.location) + self.iface = parent + basetype = self.realtype + while isinstance(basetype, Typedef): + basetype = basetype.realtype + if not isinstance(basetype, Builtin) or not basetype.maybeConst: + raise IDLError("const may only be a short or long type, not %s" % self.type, self.location) + + self.basetype = basetype + + def getValue(self): + return self.value(self.iface) + +class Attribute(object): + """Represents an attribute of the interface + """ + kind = 'attribute' + noscript = False + readonly = False + implicit_jscontext = False + nostdcall = False + binaryname = None + null = None + undefined = None + deprecated = False + nullable = False + infallible = False + defvalue = None + + def __init__(self, type, name, attlist, readonly, nullable, defvalue, location, doccomments): + self.type = type + self.name = name + self.attlist = attlist + self.readonly = readonly + self.nullable = nullable + self.defvalue = defvalue + self.location = location + self.doccomments = doccomments + + for name, value, aloc in attlist: + if name == 'binaryname': + if value is None: + raise IDLError("binaryname attribute requires a value", + aloc) + + self.binaryname = value + continue + + if name == 'Null': + if value is None: + raise IDLError("'Null' attribute requires a value", aloc) + if readonly: + raise IDLError("'Null' attribute only makes sense for setters", + aloc); + if value not in ('Empty', 'Null', 'Stringify'): + raise IDLError("'Null' attribute value must be 'Empty', 'Null' or 'Stringify'", + aloc); + self.null = value + elif name == 'Undefined': + if value is None: + raise IDLError("'Undefined' attribute requires a value", aloc) + if readonly: + raise IDLError("'Undefined' attribute only makes sense for setters", + aloc); + if value not in ('Empty', 'Null'): + raise IDLError("'Undefined' attribute value must be 'Empty' or 'Null'", + aloc); + self.undefined = value + else: + if value is not None: + raise IDLError("Unexpected attribute value", aloc) + + if name == 'noscript': + self.noscript = True + elif name == 'implicit_jscontext': + self.implicit_jscontext = True + elif name == 'deprecated': + self.deprecated = True + elif name == 'nostdcall': + self.nostdcall = True + elif name == 'infallible': + self.infallible = True + else: + raise IDLError("Unexpected attribute '%s'" % name, aloc) + + def resolve(self, iface): + self.iface = iface + self.realtype = iface.idl.getName(self.type, self.location) + if (self.null is not None and + getBuiltinOrNativeTypeName(self.realtype) != '[domstring]'): + raise IDLError("'Null' attribute can only be used on DOMString", + self.location) + if (self.undefined is not None and + getBuiltinOrNativeTypeName(self.realtype) != '[domstring]'): + raise IDLError("'Undefined' attribute can only be used on DOMString", + self.location) + if (self.nullable and + getBuiltinOrNativeTypeName(self.realtype) != '[domstring]'): + raise IDLError("Nullable types (T?) is supported only for DOMString", + self.location) + if self.infallible and not self.realtype.kind == 'builtin': + raise IDLError('[infallible] only works on builtin types ' + '(numbers, booleans, and raw char types)', + self.location) + if self.infallible and not iface.attributes.builtinclass: + raise IDLError('[infallible] attributes are only allowed on ' + '[builtinclass] interfaces', + self.location) + + + def toIDL(self): + attribs = attlistToIDL(self.attlist) + readonly = self.readonly and 'readonly ' or '' + return "%s%sattribute %s %s;" % (attribs, readonly, self.type, self.name) + + def isScriptable(self): + if not self.iface.attributes.scriptable: return False + return not self.noscript + + def __str__(self): + return "\t%sattribute %s %s\n" % (self.readonly and 'readonly ' or '', + self.type, self.name) + + def count(self): + return self.readonly and 1 or 2 + +class Method(object): + """Method of an interface. + Contains parameterlit + """ + kind = 'method' + noscript = False + notxpcom = False + binaryname = None + implicit_jscontext = False + nostdcall = False + optional_argc = False + deprecated = False + + def __init__(self, type, name, attlist, paramlist, location, doccomments, raises): + self.type = type + self.name = name + self.attlist = attlist + self.params = paramlist + self.location = location + self.doccomments = doccomments + self.raises = raises + + for name, value, aloc in attlist: + if name == 'binaryname': + if value is None: + raise IDLError("binaryname attribute requires a value", + aloc) + + self.binaryname = value + continue + + if value is not None: + raise IDLError("Unexpected attribute value", aloc) + + if name == 'noscript': + self.noscript = True + elif name == 'notxpcom': + self.notxpcom = True + elif name == 'implicit_jscontext': + self.implicit_jscontext = True + elif name == 'optional_argc': + self.optional_argc = True + elif name == 'deprecated': + self.deprecated = True + elif name == 'nostdcall': + self.nostdcall = True + else: + raise IDLError("Unexpected attribute '%s'" % name, aloc) + + self.namemap = NameMap() + for p in paramlist: + self.namemap.set(p) + + def resolve(self, iface): + self.iface = iface + self.realtype = self.iface.idl.getName(self.type, self.location) + for p in self.params: + p.resolve(self) + for p in self.params: + if p.retval and p != self.params[-1]: + raise IDLError("'retval' parameter '%s' is not the last parameter" % p.name, self.location) + if p.size_is: + found_size_param = False + for size_param in self.params: + if p.size_is == size_param.name: + found_size_param = True + if getBuiltinOrNativeTypeName(size_param.realtype) != 'unsigned long': + raise IDLError("is_size parameter must have type 'unsigned long'", self.location) + if not found_size_param: + raise IDLError("could not find is_size parameter '%s'" % p.size_is, self.location) + + def isScriptable(self): + if not self.iface.attributes.scriptable: return False + return not (self.noscript or self.notxpcom) + + def __str__(self): + return "\t%s %s(%s)\n" % (self.type, self.name, ", ".join([p.name for p in self.params])) + + def toIDL(self): + if len(self.raises): + raises = ' raises (%s)' % ','.join(self.raises) + else: + raises = '' + + return "%s%s %s (%s)%s;" % (attlistToIDL(self.attlist), + self.type, + self.name, + ", ".join([p.toIDL() + for p in self.params]), + raises) + + def needsJSTypes(self): + if self.implicit_jscontext: + return True + if self.type == "jsval": + return True + for p in self.params: + t = p.realtype + if isinstance(t, Native) and t.specialtype == "jsval": + return True + return False + + def count(self): + return 1 + +class Param(object): + size_is = None + iid_is = None + const = False + array = False + retval = False + shared = False + optional = False + null = None + undefined = None + + def __init__(self, paramtype, type, name, attlist, location, realtype=None): + self.paramtype = paramtype + self.type = type + self.name = name + self.attlist = attlist + self.location = location + self.realtype = realtype + + for name, value, aloc in attlist: + # Put the value-taking attributes first! + if name == 'size_is': + if value is None: + raise IDLError("'size_is' must specify a parameter", aloc) + self.size_is = value + elif name == 'iid_is': + if value is None: + raise IDLError("'iid_is' must specify a parameter", aloc) + self.iid_is = value + elif name == 'Null': + if value is None: + raise IDLError("'Null' must specify a parameter", aloc) + if value not in ('Empty', 'Null', 'Stringify'): + raise IDLError("'Null' parameter value must be 'Empty', 'Null', or 'Stringify'", + aloc); + self.null = value + elif name == 'Undefined': + if value is None: + raise IDLError("'Undefined' must specify a parameter", aloc) + if value not in ('Empty', 'Null'): + raise IDLError("'Undefined' parameter value must be 'Empty' or 'Null'", + aloc); + self.undefined = value + else: + if value is not None: + raise IDLError("Unexpected value for attribute '%s'" % name, + aloc) + + if name == 'const': + self.const = True + elif name == 'array': + self.array = True + elif name == 'retval': + self.retval = True + elif name == 'shared': + self.shared = True + elif name == 'optional': + self.optional = True + else: + raise IDLError("Unexpected attribute '%s'" % name, aloc) + + def resolve(self, method): + self.realtype = method.iface.idl.getName(self.type, self.location) + if self.array: + self.realtype = Array(self.realtype) + if (self.null is not None and + getBuiltinOrNativeTypeName(self.realtype) != '[domstring]'): + raise IDLError("'Null' attribute can only be used on DOMString", + self.location) + if (self.undefined is not None and + getBuiltinOrNativeTypeName(self.realtype) != '[domstring]'): + raise IDLError("'Undefined' attribute can only be used on DOMString", + self.location) + + def nativeType(self): + kwargs = {} + if self.shared: kwargs['shared'] = True + if self.const: kwargs['const'] = True + + try: + return self.realtype.nativeType(self.paramtype, **kwargs) + except IDLError as e: + raise IDLError(e.message, self.location) + except TypeError as e: + raise IDLError("Unexpected parameter attribute", self.location) + + def toIDL(self): + return "%s%s %s %s" % (paramAttlistToIDL(self.attlist), + self.paramtype, + self.type, + self.name) + def __str__(self): + return "%s %s" % (self.type, self.name) + +class Type(object): + """All Types should inherit from the Type base class + """ + def __init__(self,name): + self.name = name + +class SequenceType(Type): + """Represents a sequence Type""" + kind = 'sequence' + def __init__(self,ofType): + self.ofType = ofType + _name = "seq<%s>" % ofType + Type.__init__(self,_name) + + def __eq__(self,other): + return self.name==other + + def __hash__(self): + return hash(self.name) + + def resolve(self,parent): + print("SequenceType::resolve %s" % self.name) + parent.setName(self) + pass + + def __str__(self): + return self.name + + def nativeType(self, calltype): + return "std::list<%s> %s" % (self.ofType, + calltype != 'in' and '&' or '') + +class Array(Type): + def __init__(self, basetype): + Type.__init__(self) + self.type = basetype + + def isScriptable(self): + return self.type.isScriptable() + + def nativeType(self, calltype, const=False): + return "%s%s*" % (const and 'const ' or '', + self.type.nativeType(calltype)) + +class Declarator(object): + array = False + def __init__(self,name,array = None): + self.name = name + if array is not None: + self.array = True + self.array_sizes = array + + def startswith(self,s): + return self.name.startswith(s) + + def resolve(self,parent): + if isinstance(parent,Struct): + self.idl = parent.idl + elif isinstance(parent,IDL): + self.idl = parent + elif isinstance(parent,Module): + print("Declarator::resolve %s TODO: To Finish" % self.name) + self.idl = parent.idl + else: + raise IDLError("Declarator::resolve failed %s" % self.name,BuiltinLocation) + + def nativeType(self): + if self.array: + s="%s%s" % ( self.name,"".join(["[%s]" % (value( self.idl) ) for value in self.array_sizes ])) + else: + s = self.name + return s + + def __str__(self): + return self.nativeType() + +class StructMember(object): + def __init__(self,type,name,attlist,location,doccomments): + self.name = name + self.type = type + self.attlist = attlist + self.location = location + self.doccomments = doccomments + + def resolve(self,parent): + self.struct = parent + if isinstance(self.type,SequenceType): + parent.idl.setName(self.type) + self.realtype = self.struct.idl.getName(self.type, self.location) + self.name.resolve(parent) + pass + + def nativeType(self, shared=False, const=False): + try: + return "%s %s" % (self.realtype.nativeType('in'), self.name.nativeType() + ) + except Exception as e: + print("Exception::StructMember %s failed %s" % (self.name,e)) + exit(0) + + def __str__(self): + return "\t%s %s\n" % ( self.type,self.name) + +class Struct(object): + kind = 'struct' + def __init__(self,name,attlist,members,location,doccomments): + self.name = name + self.members = members + self.location= location + self.doc = doccomments + + def __str__(self): + l = ["struct %s\n" % self.name] + if self.members is None: + l.append("\tincomplete type\n") + else: + for m in self.members: + l.append(str(m)) + return "".join(l) + + def resolve(self, parent): + if isinstance(parent,Module): + self.idl = parent + else: + self.idl = parent + parent.setName(self) + for p in self.members: + p.resolve(self) + pass + + def nativeType(self, calltype, const=False): + return "%sstruct %s %s" % (const and 'const ' or '', + self.name, + calltype != 'in' and '*' or '') + +class Enum(object): + kind = 'enum' + def __init__(self,name,attlist,enumerators,location,doccomments): + self.name = name + self.enumerators = enumerators + self.location = location + self.doccomments = doccomments + + def resolve(self, parent): + self.idl = parent + parent.setName(self) + pass + + def __str__(self): + l = ["enum %s\n" % self.name] + if self.enumerators is None: + l.append("\tincomplete type\n") + else: + for m in self.enumerators: + l.append(str(m)) + return "".join(l) + + def nativeType(self, calltype): + return "%s %s" % (self.name, + calltype != 'in' and '*' or '') + +# +# +# +class Enumerator(object): + def __init__(self,name,value=None): + self.name = name + + def __str__(self): + return "\t%s\n" % self.name + + +class Union(object): + kind = 'union' + def __init__(self,name,attlist,dicri,cases,location,doccomments): + self.name = name + print ("TODO Union %s " % name) + + def resolve(self, parent): + self.idl = parent + pass + + +# +# COMPONENT Types +# + +class Component(object): + kind = 'component' + def __init__(self,name,_exports): + self.provides = filter(self._filterProvides,_exports) + self.uses = filter(self._filterUses,_exports) + self.attributes = filter(self._filterAttribute,_exports) + self.exports = _exports + self.name = name + + + def _filterProvides(self,export): + if export == None: + return False + if export.kind == 'provides': + return True + else: + return False + + def _filterUses(self,export): + if export == None: + return False + if export.kind == 'uses': + return True + else: + return False + + def _filterAttribute(self,export): + if export == None: + return False + if export.kind == 'attribute': + return True + else: + return False + + def resolve(self,parent): + pass + + def __str__(self): + l = ["Component %s\n" % self.name] + if self.provides is None: + l.append("\tincomplete type\n") + else: + for m in self.exports: + l.append(str(m)) + return "".join(l) + +class ComponentExport(object): + kind = '' + def __init__(self,kind,name,iface): + self.kind = kind + self.name = name + self.iface = iface + +class ComponentProvides(ComponentExport): + def __init__(self,_name,_iface): + ComponentExport.__init__(self,'provides',_name,_iface) + + def __str__(self): + return "\tprovides %s %s\n" % (self.iface, self.name) + + +class ComponentUses(ComponentExport): + def __init__(self,_name,_iface): + ComponentExport.__init__(self,'uses',_name,_iface) + + def __str__(self): + return "\tuses %s %s\n" % (self.iface,self.name) + + +class Home(object): + kind = 'home' + _exports = None + manages = None + supports = None + + def __init__(self,_name,_manages = None): + self.name = _name + + @property + def Exports(self): + return self._exports + + @Exports.setter + def Exports(self,_exports): + self._exports = _exports + + @Exports.getter + def Exports(self): + return self._exports + + def resolve(self,_parent): + pass + + def __str__(self): + l1 = "" + if self._exports == None: + l1 = "home %s\n" % (self.name) + else: + l1 = "home %s\n%s " % ( self.name , "\n".join([str(p) for p in self._exports])) + return l1 + + +class Factory(object): + kind = 'factory' + parameters = None + def __init__(self,_name,_params = None): + self.name = _name + self.parameters = _params + + def resolve(self,_parent): + pass + + def __str__(self): + return "\tfactory %s( %s )\n" % ( self.name, ", ".join([str(p) for p in self.parameters])) + + diff --git a/pidl/IDLUtils.py b/pidl/IDLUtils.py new file mode 100644 index 0000000..9eb6ed2 --- /dev/null +++ b/pidl/IDLUtils.py @@ -0,0 +1,103 @@ +# vim: noet sw=4 sws=4 ts=4 list + +class Location(object): + """ + Location class provides the required of the location where + an error occured during parsing + """ + _line = None + + def __init__(self, lexer, lineno, lexpos): + self._lineno = lineno + self._lexpos = lexpos + self._lexdata = lexer.lexdata + self._file = getattr(lexer, 'filename', "") + + def __eq__(self, other): + return self._lexpos == other._lexpos and \ + self._file == other._file + + def resolve(self): + if self._line: + return + + startofline = self._lexdata.rfind('\n', 0, self._lexpos) + 1 + endofline = self._lexdata.find('\n', self._lexpos, self._lexpos + 80) + self._line = self._lexdata[startofline:endofline] + self._colno = self._lexpos - startofline + + def pointerline(self): + def i(): + for i in range(0, self._colno): + yield " " + yield "^" + + return "".join(i()) + + def get(self): + self.resolve() + return "%s line %s:%s" % (self._file, self._lineno, self._colno) + + def __str__(self): + self.resolve() + return "%s line %s:%s\n%s\n%s" % (self._file, self._lineno, self._colno, + self._line, self.pointerline()) + + +class IDLError(Exception): + """ + IDL Parsing Exception + """ + def __init__(self, message, location, warning=False): + self.message = message + self.location = location + self.warning = warning + + def __str__(self): + return "%s: %s, %s" % (self.warning and 'warning' or 'error', + self.message, self.location) + + + +class IDLPLogger(object): + """ + This object is a stand-in for a logging object created by the + logging module. + """ + + def __init__(self, f): + self.f = f + + def empty(self, msg, *args, **kwargs): + return self + + def critical(self, msg, *args, **kwargs): + self.f.write(("CRITI:" +msg % args) + '\n') + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNI: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') + + def _debug(self, msg, *args, **kwargs): + self.f.write('DEBUG: ' + (msg % args) + '\n') + + def _info(self, msg, *args, **kwargs): + self.f.write('INFO : ' + (msg % args) + '\n') + + info = _info + debug = empty + + +class NullLogger(object): + """ + Null logger is used when no output is generated. Does nothing. + """ + def __getattribute__(self, name): + return self + + def __call__(self, *args, **kwargs): + return self + + diff --git a/pidl/__init__.py b/pidl/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/pidl/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/pidl/pidl b/pidl/pidl new file mode 100755 index 0000000..397fafe --- /dev/null +++ b/pidl/pidl @@ -0,0 +1 @@ +#!/usr/bin/env python IDLParser.py diff --git a/ply/__init__.py b/ply/__init__.py new file mode 100644 index 0000000..6e53cdd --- /dev/null +++ b/ply/__init__.py @@ -0,0 +1,5 @@ +# PLY package +# Author: David Beazley (dave@dabeaz.com) + +__version__ = '3.9' +__all__ = ['lex','yacc'] diff --git a/ply/cpp.py b/ply/cpp.py new file mode 100644 index 0000000..ade2987 --- /dev/null +++ b/ply/cpp.py @@ -0,0 +1,917 @@ +# ----------------------------------------------------------------------------- +# cpp.py +# +# Author: David Beazley (http://www.dabeaz.com) +# Copyright (C) 2007 +# All rights reserved +# +# This module implements an ANSI-C style lexical preprocessor for PLY. +# ----------------------------------------------------------------------------- +from __future__ import generators + +import sys + +# Some Python 3 compatibility shims +if sys.version_info.major < 3: + STRING_TYPES = (str, unicode) +else: + STRING_TYPES = str + xrange = range + +# ----------------------------------------------------------------------------- +# Default preprocessor lexer definitions. These tokens are enough to get +# a basic preprocessor working. Other modules may import these if they want +# ----------------------------------------------------------------------------- + +tokens = ( + 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND' +) + +literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\"" + +# Whitespace +def t_CPP_WS(t): + r'\s+' + t.lexer.lineno += t.value.count("\n") + return t + +t_CPP_POUND = r'\#' +t_CPP_DPOUND = r'\#\#' + +# Identifier +t_CPP_ID = r'[A-Za-z_][\w_]*' + +# Integer literal +def CPP_INTEGER(t): + r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)' + return t + +t_CPP_INTEGER = CPP_INTEGER + +# Floating literal +t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' + +# String literal +def t_CPP_STRING(t): + r'\"([^\\\n]|(\\(.|\n)))*?\"' + t.lexer.lineno += t.value.count("\n") + return t + +# Character constant 'c' or L'c' +def t_CPP_CHAR(t): + r'(L)?\'([^\\\n]|(\\(.|\n)))*?\'' + t.lexer.lineno += t.value.count("\n") + return t + +# Comment +def t_CPP_COMMENT1(t): + r'(/\*(.|\n)*?\*/)' + ncr = t.value.count("\n") + t.lexer.lineno += ncr + # replace with one space or a number of '\n' + t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' ' + return t + +# Line comment +def t_CPP_COMMENT2(t): + r'(//.*?(\n|$))' + # replace with '/n' + t.type = 'CPP_WS'; t.value = '\n' + +def t_error(t): + t.type = t.value[0] + t.value = t.value[0] + t.lexer.skip(1) + return t + +import re +import copy +import time +import os.path + +# ----------------------------------------------------------------------------- +# trigraph() +# +# Given an input string, this function replaces all trigraph sequences. +# The following mapping is used: +# +# ??= # +# ??/ \ +# ??' ^ +# ??( [ +# ??) ] +# ??! | +# ??< { +# ??> } +# ??- ~ +# ----------------------------------------------------------------------------- + +_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''') +_trigraph_rep = { + '=':'#', + '/':'\\', + "'":'^', + '(':'[', + ')':']', + '!':'|', + '<':'{', + '>':'}', + '-':'~' +} + +def trigraph(input): + return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input) + +# ------------------------------------------------------------------ +# Macro object +# +# This object holds information about preprocessor macros +# +# .name - Macro name (string) +# .value - Macro value (a list of tokens) +# .arglist - List of argument names +# .variadic - Boolean indicating whether or not variadic macro +# .vararg - Name of the variadic parameter +# +# When a macro is created, the macro replacement token sequence is +# pre-scanned and used to create patch lists that are later used +# during macro expansion +# ------------------------------------------------------------------ + +class Macro(object): + def __init__(self,name,value,arglist=None,variadic=False): + self.name = name + self.value = value + self.arglist = arglist + self.variadic = variadic + if variadic: + self.vararg = arglist[-1] + self.source = None + +# ------------------------------------------------------------------ +# Preprocessor object +# +# Object representing a preprocessor. Contains macro definitions, +# include directories, and other information +# ------------------------------------------------------------------ + +class Preprocessor(object): + def __init__(self,lexer=None): + if lexer is None: + lexer = lex.lexer + self.lexer = lexer + self.macros = { } + self.path = [] + self.temp_path = [] + + # Probe the lexer for selected tokens + self.lexprobe() + + tm = time.localtime() + self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm)) + self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm)) + self.parser = None + + # ----------------------------------------------------------------------------- + # tokenize() + # + # Utility function. Given a string of text, tokenize into a list of tokens + # ----------------------------------------------------------------------------- + + def tokenize(self,text): + tokens = [] + self.lexer.input(text) + while True: + tok = self.lexer.token() + if not tok: break + tokens.append(tok) + return tokens + + # --------------------------------------------------------------------- + # error() + # + # Report a preprocessor error/warning of some kind + # ---------------------------------------------------------------------- + + def error(self,file,line,msg): + print("%s:%d %s" % (file,line,msg)) + + # ---------------------------------------------------------------------- + # lexprobe() + # + # This method probes the preprocessor lexer object to discover + # the token types of symbols that are important to the preprocessor. + # If this works right, the preprocessor will simply "work" + # with any suitable lexer regardless of how tokens have been named. + # ---------------------------------------------------------------------- + + def lexprobe(self): + + # Determine the token type for identifiers + self.lexer.input("identifier") + tok = self.lexer.token() + if not tok or tok.value != "identifier": + print("Couldn't determine identifier type") + else: + self.t_ID = tok.type + + # Determine the token type for integers + self.lexer.input("12345") + tok = self.lexer.token() + if not tok or int(tok.value) != 12345: + print("Couldn't determine integer type") + else: + self.t_INTEGER = tok.type + self.t_INTEGER_TYPE = type(tok.value) + + # Determine the token type for strings enclosed in double quotes + self.lexer.input("\"filename\"") + tok = self.lexer.token() + if not tok or tok.value != "\"filename\"": + print("Couldn't determine string type") + else: + self.t_STRING = tok.type + + # Determine the token type for whitespace--if any + self.lexer.input(" ") + tok = self.lexer.token() + if not tok or tok.value != " ": + self.t_SPACE = None + else: + self.t_SPACE = tok.type + + # Determine the token type for newlines + self.lexer.input("\n") + tok = self.lexer.token() + if not tok or tok.value != "\n": + self.t_NEWLINE = None + print("Couldn't determine token for newlines") + else: + self.t_NEWLINE = tok.type + + self.t_WS = (self.t_SPACE, self.t_NEWLINE) + + # Check for other characters used by the preprocessor + chars = [ '<','>','#','##','\\','(',')',',','.'] + for c in chars: + self.lexer.input(c) + tok = self.lexer.token() + if not tok or tok.value != c: + print("Unable to lex '%s' required for preprocessor" % c) + + # ---------------------------------------------------------------------- + # add_path() + # + # Adds a search path to the preprocessor. + # ---------------------------------------------------------------------- + + def add_path(self,path): + self.path.append(path) + + # ---------------------------------------------------------------------- + # group_lines() + # + # Given an input string, this function splits it into lines. Trailing whitespace + # is removed. Any line ending with \ is grouped with the next line. This + # function forms the lowest level of the preprocessor---grouping into text into + # a line-by-line format. + # ---------------------------------------------------------------------- + + def group_lines(self,input): + lex = self.lexer.clone() + lines = [x.rstrip() for x in input.splitlines()] + for i in xrange(len(lines)): + j = i+1 + while lines[i].endswith('\\') and (j < len(lines)): + lines[i] = lines[i][:-1]+lines[j] + lines[j] = "" + j += 1 + + input = "\n".join(lines) + lex.input(input) + lex.lineno = 1 + + current_line = [] + while True: + tok = lex.token() + if not tok: + break + current_line.append(tok) + if tok.type in self.t_WS and '\n' in tok.value: + yield current_line + current_line = [] + + if current_line: + yield current_line + + # ---------------------------------------------------------------------- + # tokenstrip() + # + # Remove leading/trailing whitespace tokens from a token list + # ---------------------------------------------------------------------- + + def tokenstrip(self,tokens): + i = 0 + while i < len(tokens) and tokens[i].type in self.t_WS: + i += 1 + del tokens[:i] + i = len(tokens)-1 + while i >= 0 and tokens[i].type in self.t_WS: + i -= 1 + del tokens[i+1:] + return tokens + + + # ---------------------------------------------------------------------- + # collect_args() + # + # Collects comma separated arguments from a list of tokens. The arguments + # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) + # where tokencount is the number of tokens consumed, args is a list of arguments, + # and positions is a list of integers containing the starting index of each + # argument. Each argument is represented by a list of tokens. + # + # When collecting arguments, leading and trailing whitespace is removed + # from each argument. + # + # This function properly handles nested parenthesis and commas---these do not + # define new arguments. + # ---------------------------------------------------------------------- + + def collect_args(self,tokenlist): + args = [] + positions = [] + current_arg = [] + nesting = 1 + tokenlen = len(tokenlist) + + # Search for the opening '('. + i = 0 + while (i < tokenlen) and (tokenlist[i].type in self.t_WS): + i += 1 + + if (i < tokenlen) and (tokenlist[i].value == '('): + positions.append(i+1) + else: + self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments") + return 0, [], [] + + i += 1 + + while i < tokenlen: + t = tokenlist[i] + if t.value == '(': + current_arg.append(t) + nesting += 1 + elif t.value == ')': + nesting -= 1 + if nesting == 0: + if current_arg: + args.append(self.tokenstrip(current_arg)) + positions.append(i) + return i+1,args,positions + current_arg.append(t) + elif t.value == ',' and nesting == 1: + args.append(self.tokenstrip(current_arg)) + positions.append(i+1) + current_arg = [] + else: + current_arg.append(t) + i += 1 + + # Missing end argument + self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") + return 0, [],[] + + # ---------------------------------------------------------------------- + # macro_prescan() + # + # Examine the macro value (token sequence) and identify patch points + # This is used to speed up macro expansion later on---we'll know + # right away where to apply patches to the value to form the expansion + # ---------------------------------------------------------------------- + + def macro_prescan(self,macro): + macro.patch = [] # Standard macro arguments + macro.str_patch = [] # String conversion expansion + macro.var_comma_patch = [] # Variadic macro comma patch + i = 0 + while i < len(macro.value): + if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist: + argnum = macro.arglist.index(macro.value[i].value) + # Conversion of argument to a string + if i > 0 and macro.value[i-1].value == '#': + macro.value[i] = copy.copy(macro.value[i]) + macro.value[i].type = self.t_STRING + del macro.value[i-1] + macro.str_patch.append((argnum,i-1)) + continue + # Concatenation + elif (i > 0 and macro.value[i-1].value == '##'): + macro.patch.append(('c',argnum,i-1)) + del macro.value[i-1] + continue + elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): + macro.patch.append(('c',argnum,i)) + i += 1 + continue + # Standard expansion + else: + macro.patch.append(('e',argnum,i)) + elif macro.value[i].value == '##': + if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \ + ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \ + (macro.value[i+1].value == macro.vararg): + macro.var_comma_patch.append(i-1) + i += 1 + macro.patch.sort(key=lambda x: x[2],reverse=True) + + # ---------------------------------------------------------------------- + # macro_expand_args() + # + # Given a Macro and list of arguments (each a token list), this method + # returns an expanded version of a macro. The return value is a token sequence + # representing the replacement macro tokens + # ---------------------------------------------------------------------- + + def macro_expand_args(self,macro,args): + # Make a copy of the macro token sequence + rep = [copy.copy(_x) for _x in macro.value] + + # Make string expansion patches. These do not alter the length of the replacement sequence + + str_expansion = {} + for argnum, i in macro.str_patch: + if argnum not in str_expansion: + str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\") + rep[i] = copy.copy(rep[i]) + rep[i].value = str_expansion[argnum] + + # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid + comma_patch = False + if macro.variadic and not args[-1]: + for i in macro.var_comma_patch: + rep[i] = None + comma_patch = True + + # Make all other patches. The order of these matters. It is assumed that the patch list + # has been sorted in reverse order of patch location since replacements will cause the + # size of the replacement sequence to expand from the patch point. + + expanded = { } + for ptype, argnum, i in macro.patch: + # Concatenation. Argument is left unexpanded + if ptype == 'c': + rep[i:i+1] = args[argnum] + # Normal expansion. Argument is macro expanded first + elif ptype == 'e': + if argnum not in expanded: + expanded[argnum] = self.expand_macros(args[argnum]) + rep[i:i+1] = expanded[argnum] + + # Get rid of removed comma if necessary + if comma_patch: + rep = [_i for _i in rep if _i] + + return rep + + + # ---------------------------------------------------------------------- + # expand_macros() + # + # Given a list of tokens, this function performs macro expansion. + # The expanded argument is a dictionary that contains macros already + # expanded. This is used to prevent infinite recursion. + # ---------------------------------------------------------------------- + + def expand_macros(self,tokens,expanded=None): + if expanded is None: + expanded = {} + i = 0 + while i < len(tokens): + t = tokens[i] + if t.type == self.t_ID: + if t.value in self.macros and t.value not in expanded: + # Yes, we found a macro match + expanded[t.value] = True + + m = self.macros[t.value] + if not m.arglist: + # A simple macro + ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded) + for e in ex: + e.lineno = t.lineno + tokens[i:i+1] = ex + i += len(ex) + else: + # A macro with arguments + j = i + 1 + while j < len(tokens) and tokens[j].type in self.t_WS: + j += 1 + if tokens[j].value == '(': + tokcount,args,positions = self.collect_args(tokens[j:]) + if not m.variadic and len(args) != len(m.arglist): + self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) + i = j + tokcount + elif m.variadic and len(args) < len(m.arglist)-1: + if len(m.arglist) > 2: + self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1)) + else: + self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1)) + i = j + tokcount + else: + if m.variadic: + if len(args) == len(m.arglist)-1: + args.append([]) + else: + args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] + del args[len(m.arglist):] + + # Get macro replacement text + rep = self.macro_expand_args(m,args) + rep = self.expand_macros(rep,expanded) + for r in rep: + r.lineno = t.lineno + tokens[i:j+tokcount] = rep + i += len(rep) + del expanded[t.value] + continue + elif t.value == '__LINE__': + t.type = self.t_INTEGER + t.value = self.t_INTEGER_TYPE(t.lineno) + + i += 1 + return tokens + + # ---------------------------------------------------------------------- + # evalexpr() + # + # Evaluate an expression token sequence for the purposes of evaluating + # integral expressions. + # ---------------------------------------------------------------------- + + def evalexpr(self,tokens): + # tokens = tokenize(line) + # Search for defined macros + i = 0 + while i < len(tokens): + if tokens[i].type == self.t_ID and tokens[i].value == 'defined': + j = i + 1 + needparen = False + result = "0L" + while j < len(tokens): + if tokens[j].type in self.t_WS: + j += 1 + continue + elif tokens[j].type == self.t_ID: + if tokens[j].value in self.macros: + result = "1L" + else: + result = "0L" + if not needparen: break + elif tokens[j].value == '(': + needparen = True + elif tokens[j].value == ')': + break + else: + self.error(self.source,tokens[i].lineno,"Malformed defined()") + j += 1 + tokens[i].type = self.t_INTEGER + tokens[i].value = self.t_INTEGER_TYPE(result) + del tokens[i+1:j+1] + i += 1 + tokens = self.expand_macros(tokens) + for i,t in enumerate(tokens): + if t.type == self.t_ID: + tokens[i] = copy.copy(t) + tokens[i].type = self.t_INTEGER + tokens[i].value = self.t_INTEGER_TYPE("0L") + elif t.type == self.t_INTEGER: + tokens[i] = copy.copy(t) + # Strip off any trailing suffixes + tokens[i].value = str(tokens[i].value) + while tokens[i].value[-1] not in "0123456789abcdefABCDEF": + tokens[i].value = tokens[i].value[:-1] + + expr = "".join([str(x.value) for x in tokens]) + expr = expr.replace("&&"," and ") + expr = expr.replace("||"," or ") + expr = expr.replace("!"," not ") + try: + result = eval(expr) + except Exception: + self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") + result = 0 + return result + + # ---------------------------------------------------------------------- + # parsegen() + # + # Parse an input string/ + # ---------------------------------------------------------------------- + def parsegen(self,input,source=None): + + # Replace trigraph sequences + t = trigraph(input) + lines = self.group_lines(t) + + if not source: + source = "" + + self.define("__FILE__ \"%s\"" % source) + + self.source = source + chunk = [] + enable = True + iftrigger = False + ifstack = [] + + for x in lines: + for i,tok in enumerate(x): + if tok.type not in self.t_WS: break + if tok.value == '#': + # Preprocessor directive + + # insert necessary whitespace instead of eaten tokens + for tok in x: + if tok.type in self.t_WS and '\n' in tok.value: + chunk.append(tok) + + dirtokens = self.tokenstrip(x[i+1:]) + if dirtokens: + name = dirtokens[0].value + args = self.tokenstrip(dirtokens[1:]) + else: + name = "" + args = [] + + if name == 'define': + if enable: + for tok in self.expand_macros(chunk): + yield tok + chunk = [] + self.define(args) + elif name == 'include': + if enable: + for tok in self.expand_macros(chunk): + yield tok + chunk = [] + oldfile = self.macros['__FILE__'] + for tok in self.include(args): + yield tok + self.macros['__FILE__'] = oldfile + self.source = source + elif name == 'undef': + if enable: + for tok in self.expand_macros(chunk): + yield tok + chunk = [] + self.undef(args) + elif name == 'ifdef': + ifstack.append((enable,iftrigger)) + if enable: + if not args[0].value in self.macros: + enable = False + iftrigger = False + else: + iftrigger = True + elif name == 'ifndef': + ifstack.append((enable,iftrigger)) + if enable: + if args[0].value in self.macros: + enable = False + iftrigger = False + else: + iftrigger = True + elif name == 'if': + ifstack.append((enable,iftrigger)) + if enable: + result = self.evalexpr(args) + if not result: + enable = False + iftrigger = False + else: + iftrigger = True + elif name == 'elif': + if ifstack: + if ifstack[-1][0]: # We only pay attention if outer "if" allows this + if enable: # If already true, we flip enable False + enable = False + elif not iftrigger: # If False, but not triggered yet, we'll check expression + result = self.evalexpr(args) + if result: + enable = True + iftrigger = True + else: + self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") + + elif name == 'else': + if ifstack: + if ifstack[-1][0]: + if enable: + enable = False + elif not iftrigger: + enable = True + iftrigger = True + else: + self.error(self.source,dirtokens[0].lineno,"Misplaced #else") + + elif name == 'endif': + if ifstack: + enable,iftrigger = ifstack.pop() + else: + self.error(self.source,dirtokens[0].lineno,"Misplaced #endif") + else: + # Unknown preprocessor directive + pass + + else: + # Normal text + if enable: + chunk.extend(x) + + for tok in self.expand_macros(chunk): + yield tok + chunk = [] + + # ---------------------------------------------------------------------- + # include() + # + # Implementation of file-inclusion + # ---------------------------------------------------------------------- + + def include(self,tokens): + # Try to extract the filename and then process an include file + if not tokens: + return + if tokens: + if tokens[0].value != '<' and tokens[0].type != self.t_STRING: + tokens = self.expand_macros(tokens) + + if tokens[0].value == '<': + # Include <...> + i = 1 + while i < len(tokens): + if tokens[i].value == '>': + break + i += 1 + else: + print("Malformed #include <...>") + return + filename = "".join([x.value for x in tokens[1:i]]) + path = self.path + [""] + self.temp_path + elif tokens[0].type == self.t_STRING: + filename = tokens[0].value[1:-1] + path = self.temp_path + [""] + self.path + else: + print("Malformed #include statement") + return + for p in path: + iname = os.path.join(p,filename) + try: + data = open(iname,"r").read() + dname = os.path.dirname(iname) + if dname: + self.temp_path.insert(0,dname) + for tok in self.parsegen(data,filename): + yield tok + if dname: + del self.temp_path[0] + break + except IOError: + pass + else: + print("Couldn't find '%s'" % filename) + + # ---------------------------------------------------------------------- + # define() + # + # Define a new macro + # ---------------------------------------------------------------------- + + def define(self,tokens): + if isinstance(tokens,STRING_TYPES): + tokens = self.tokenize(tokens) + + linetok = tokens + try: + name = linetok[0] + if len(linetok) > 1: + mtype = linetok[1] + else: + mtype = None + if not mtype: + m = Macro(name.value,[]) + self.macros[name.value] = m + elif mtype.type in self.t_WS: + # A normal macro + m = Macro(name.value,self.tokenstrip(linetok[2:])) + self.macros[name.value] = m + elif mtype.value == '(': + # A macro with arguments + tokcount, args, positions = self.collect_args(linetok[1:]) + variadic = False + for a in args: + if variadic: + print("No more arguments may follow a variadic argument") + break + astr = "".join([str(_i.value) for _i in a]) + if astr == "...": + variadic = True + a[0].type = self.t_ID + a[0].value = '__VA_ARGS__' + variadic = True + del a[1:] + continue + elif astr[-3:] == "..." and a[0].type == self.t_ID: + variadic = True + del a[1:] + # If, for some reason, "." is part of the identifier, strip off the name for the purposes + # of macro expansion + if a[0].value[-3:] == '...': + a[0].value = a[0].value[:-3] + continue + if len(a) > 1 or a[0].type != self.t_ID: + print("Invalid macro argument") + break + else: + mvalue = self.tokenstrip(linetok[1+tokcount:]) + i = 0 + while i < len(mvalue): + if i+1 < len(mvalue): + if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##': + del mvalue[i] + continue + elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS: + del mvalue[i+1] + i += 1 + m = Macro(name.value,mvalue,[x[0].value for x in args],variadic) + self.macro_prescan(m) + self.macros[name.value] = m + else: + print("Bad macro definition") + except LookupError: + print("Bad macro definition") + + # ---------------------------------------------------------------------- + # undef() + # + # Undefine a macro + # ---------------------------------------------------------------------- + + def undef(self,tokens): + id = tokens[0].value + try: + del self.macros[id] + except LookupError: + pass + + # ---------------------------------------------------------------------- + # parse() + # + # Parse input text. + # ---------------------------------------------------------------------- + def parse(self,input,source=None,ignore={}): + self.ignore = ignore + self.parser = self.parsegen(input,source) + + # ---------------------------------------------------------------------- + # token() + # + # Method to return individual tokens + # ---------------------------------------------------------------------- + def token(self): + try: + while True: + tok = next(self.parser) + if tok.type not in self.ignore: return tok + except StopIteration: + self.parser = None + return None + +if __name__ == '__main__': + import ply.lex as lex + lexer = lex.lex() + + # Run a preprocessor + import sys + f = open(sys.argv[1]) + input = f.read() + + p = Preprocessor(lexer) + p.parse(input,sys.argv[1]) + while True: + tok = p.token() + if not tok: break + print(p.source, tok) + + + + + + + + + + + diff --git a/ply/ctokens.py b/ply/ctokens.py new file mode 100644 index 0000000..f6f6952 --- /dev/null +++ b/ply/ctokens.py @@ -0,0 +1,133 @@ +# ---------------------------------------------------------------------- +# ctokens.py +# +# Token specifications for symbols in ANSI C and C++. This file is +# meant to be used as a library in other tokenizers. +# ---------------------------------------------------------------------- + +# Reserved words + +tokens = [ + # Literals (identifier, integer constant, float constant, string constant, char const) + 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER', + + # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) + 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO', + 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', + 'LOR', 'LAND', 'LNOT', + 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', + + # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) + 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', + 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', + + # Increment/decrement (++,--) + 'INCREMENT', 'DECREMENT', + + # Structure dereference (->) + 'ARROW', + + # Ternary operator (?) + 'TERNARY', + + # Delimeters ( ) [ ] { } , . ; : + 'LPAREN', 'RPAREN', + 'LBRACKET', 'RBRACKET', + 'LBRACE', 'RBRACE', + 'COMMA', 'PERIOD', 'SEMI', 'COLON', + + # Ellipsis (...) + 'ELLIPSIS', +] + +# Operators +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_MODULO = r'%' +t_OR = r'\|' +t_AND = r'&' +t_NOT = r'~' +t_XOR = r'\^' +t_LSHIFT = r'<<' +t_RSHIFT = r'>>' +t_LOR = r'\|\|' +t_LAND = r'&&' +t_LNOT = r'!' +t_LT = r'<' +t_GT = r'>' +t_LE = r'<=' +t_GE = r'>=' +t_EQ = r'==' +t_NE = r'!=' + +# Assignment operators + +t_EQUALS = r'=' +t_TIMESEQUAL = r'\*=' +t_DIVEQUAL = r'/=' +t_MODEQUAL = r'%=' +t_PLUSEQUAL = r'\+=' +t_MINUSEQUAL = r'-=' +t_LSHIFTEQUAL = r'<<=' +t_RSHIFTEQUAL = r'>>=' +t_ANDEQUAL = r'&=' +t_OREQUAL = r'\|=' +t_XOREQUAL = r'\^=' + +# Increment/decrement +t_INCREMENT = r'\+\+' +t_DECREMENT = r'--' + +# -> +t_ARROW = r'->' + +# ? +t_TERNARY = r'\?' + +# Delimeters +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LBRACKET = r'\[' +t_RBRACKET = r'\]' +t_LBRACE = r'\{' +t_RBRACE = r'\}' +t_COMMA = r',' +t_PERIOD = r'\.' +t_SEMI = r';' +t_COLON = r':' +t_ELLIPSIS = r'\.\.\.' + +# Identifiers +t_ID = r'[A-Za-z_][A-Za-z0-9_]*' + +# Integer literal +t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' + +# Floating literal +t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' + +# String literal +t_STRING = r'\"([^\\\n]|(\\.))*?\"' + +# Character constant 'c' or L'c' +t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\'' + +# Comment (C-Style) +def t_COMMENT(t): + r'/\*(.|\n)*?\*/' + t.lexer.lineno += t.value.count('\n') + return t + +# Comment (C++-Style) +def t_CPPCOMMENT(t): + r'//.*\n' + t.lexer.lineno += 1 + return t + + + + + + diff --git a/ply/lex.py b/ply/lex.py new file mode 100644 index 0000000..0f3e464 --- /dev/null +++ b/ply/lex.py @@ -0,0 +1,1100 @@ +# ----------------------------------------------------------------------------- +# ply: lex.py +# +# Copyright (C) 2001-2016 +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- + +__version__ = '3.9' +__tabversion__ = '3.8' + +import re +import sys +import types +import copy +import os +import inspect + +# This tuple contains known string types +try: + # Python 2.6 + StringTypes = (types.StringType, types.UnicodeType) +except AttributeError: + # Python 3.0 + StringTypes = (str, bytes) + +# This regular expression is used to match valid token names +_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') + +# Exception thrown when invalid token encountered and no default error +# handler is defined. +class LexError(Exception): + def __init__(self, message, s): + self.args = (message,) + self.text = s + + +# Token class. This class is used to represent the tokens produced. +class LexToken(object): + def __str__(self): + return 'LexToken(%s,%r,%d,%d)' % (self.type, self.value, self.lineno, self.lexpos) + + def __repr__(self): + return str(self) + + +# This object is a stand-in for a logging object created by the +# logging module. + +class PlyLogger(object): + def __init__(self, f): + self.f = f + + def critical(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') + + info = critical + debug = critical + + +# Null logger is used when no output is generated. Does nothing. +class NullLogger(object): + def __getattribute__(self, name): + return self + + def __call__(self, *args, **kwargs): + return self + + +# ----------------------------------------------------------------------------- +# === Lexing Engine === +# +# The following Lexer class implements the lexer runtime. There are only +# a few public methods and attributes: +# +# input() - Store a new string in the lexer +# token() - Get the next token +# clone() - Clone the lexer +# +# lineno - Current line number +# lexpos - Current position in the input string +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression. This is a list of + # tuples (re, findex) where re is a compiled + # regular expression and findex is a list + # mapping regex group numbers to rules + self.lexretext = None # Current regular expression strings + self.lexstatere = {} # Dictionary mapping lexer states to master regexs + self.lexstateretext = {} # Dictionary mapping lexer states to regex strings + self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names + self.lexstate = 'INITIAL' # Current lexer state + self.lexstatestack = [] # Stack of lexer states + self.lexstateinfo = None # State information + self.lexstateignore = {} # Dictionary of ignored characters for each state + self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexstateeoff = {} # Dictionary of eof functions for each state + self.lexreflags = 0 # Optional re compile flags + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexerrorf = None # Error rule (if any) + self.lexeoff = None # EOF rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = '' # Ignored characters + self.lexliterals = '' # Literal characters that can be passed through + self.lexmodule = None # Module + self.lineno = 1 # Current line number + self.lexoptimize = False # Optimized mode + + def clone(self, object=None): + c = copy.copy(self) + + # If the object parameter has been supplied, it means we are attaching the + # lexer to a new object. In this case, we have to rebind all methods in + # the lexstatere and lexstateerrorf tables. + + if object: + newtab = {} + for key, ritem in self.lexstatere.items(): + newre = [] + for cre, findex in ritem: + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object, f[0].__name__), f[1])) + newre.append((cre, newfindex)) + newtab[key] = newre + c.lexstatere = newtab + c.lexstateerrorf = {} + for key, ef in self.lexstateerrorf.items(): + c.lexstateerrorf[key] = getattr(object, ef.__name__) + c.lexmodule = object + return c + + # ------------------------------------------------------------ + # writetab() - Write lexer information to a table file + # ------------------------------------------------------------ + def writetab(self, lextab, outputdir=''): + if isinstance(lextab, types.ModuleType): + raise IOError("Won't overwrite existing lextab module") + basetabmodule = lextab.split('.')[-1] + filename = os.path.join(outputdir, basetabmodule) + '.py' + with open(filename, 'w') as tf: + tf.write('# %s.py. This file automatically created by PLY (version %s). Don\'t edit!\n' % (basetabmodule, __version__)) + tf.write('_tabversion = %s\n' % repr(__tabversion__)) + tf.write('_lextokens = set(%s)\n' % repr(tuple(self.lextokens))) + tf.write('_lexreflags = %s\n' % repr(self.lexreflags)) + tf.write('_lexliterals = %s\n' % repr(self.lexliterals)) + tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo)) + + # Rewrite the lexstatere table, replacing function objects with function names + tabre = {} + for statename, lre in self.lexstatere.items(): + titem = [] + for (pat, func), retext, renames in zip(lre, self.lexstateretext[statename], self.lexstaterenames[statename]): + titem.append((retext, _funcs_to_names(func, renames))) + tabre[statename] = titem + + tf.write('_lexstatere = %s\n' % repr(tabre)) + tf.write('_lexstateignore = %s\n' % repr(self.lexstateignore)) + + taberr = {} + for statename, ef in self.lexstateerrorf.items(): + taberr[statename] = ef.__name__ if ef else None + tf.write('_lexstateerrorf = %s\n' % repr(taberr)) + + tabeof = {} + for statename, ef in self.lexstateeoff.items(): + tabeof[statename] = ef.__name__ if ef else None + tf.write('_lexstateeoff = %s\n' % repr(tabeof)) + + # ------------------------------------------------------------ + # readtab() - Read lexer information from a tab file + # ------------------------------------------------------------ + def readtab(self, tabfile, fdict): + if isinstance(tabfile, types.ModuleType): + lextab = tabfile + else: + exec('import %s' % tabfile) + lextab = sys.modules[tabfile] + + if getattr(lextab, '_tabversion', '0.0') != __tabversion__: + raise ImportError('Inconsistent PLY version') + + self.lextokens = lextab._lextokens + self.lexreflags = lextab._lexreflags + self.lexliterals = lextab._lexliterals + self.lextokens_all = self.lextokens | set(self.lexliterals) + self.lexstateinfo = lextab._lexstateinfo + self.lexstateignore = lextab._lexstateignore + self.lexstatere = {} + self.lexstateretext = {} + for statename, lre in lextab._lexstatere.items(): + titem = [] + txtitem = [] + for pat, func_name in lre: + titem.append((re.compile(pat, lextab._lexreflags | re.VERBOSE), _names_to_funcs(func_name, fdict))) + + self.lexstatere[statename] = titem + self.lexstateretext[statename] = txtitem + + self.lexstateerrorf = {} + for statename, ef in lextab._lexstateerrorf.items(): + self.lexstateerrorf[statename] = fdict[ef] + + self.lexstateeoff = {} + for statename, ef in lextab._lexstateeoff.items(): + self.lexstateeoff[statename] = fdict[ef] + + self.begin('INITIAL') + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self, s): + # Pull off the first character to see if s looks like a string + c = s[:1] + if not isinstance(c, StringTypes): + raise ValueError('Expected a string') + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + + # ------------------------------------------------------------ + # begin() - Changes the lexing state + # ------------------------------------------------------------ + def begin(self, state): + if state not in self.lexstatere: + raise ValueError('Undefined state') + self.lexre = self.lexstatere[state] + self.lexretext = self.lexstateretext[state] + self.lexignore = self.lexstateignore.get(state, '') + self.lexerrorf = self.lexstateerrorf.get(state, None) + self.lexeoff = self.lexstateeoff.get(state, None) + self.lexstate = state + + # ------------------------------------------------------------ + # push_state() - Changes the lexing state and saves old on stack + # ------------------------------------------------------------ + def push_state(self, state): + self.lexstatestack.append(self.lexstate) + self.begin(state) + + # ------------------------------------------------------------ + # pop_state() - Restores the previous state + # ------------------------------------------------------------ + def pop_state(self): + self.begin(self.lexstatestack.pop()) + + # ------------------------------------------------------------ + # current_state() - Returns the current lexing state + # ------------------------------------------------------------ + def current_state(self): + return self.lexstate + + # ------------------------------------------------------------ + # skip() - Skip ahead n characters + # ------------------------------------------------------------ + def skip(self, n): + self.lexpos += n + + # ------------------------------------------------------------ + # opttoken() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def token(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + for lexre, lexindexfunc in self.lexre: + m = lexre.match(lexdata, lexpos) + if not m: + continue + + # Create a token for return + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexpos = lexpos + + i = m.lastindex + func, tok.type = lexindexfunc[i] + + if not func: + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break + + lexpos = m.end() + + # If token is processed by a function, call it + + tok.lexer = self # Set additional attributes useful in token rules + self.lexmatch = m + self.lexpos = lexpos + + newtok = func(tok) + + # Every function must return a token, if nothing, we just move to next token + if not newtok: + lexpos = self.lexpos # This is here in case user has updated lexpos. + lexignore = self.lexignore # This is here in case there was a state change + break + + # Verify type of the token. If not in the token map, raise an error + if not self.lexoptimize: + if newtok.type not in self.lextokens_all: + raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( + func.__code__.co_filename, func.__code__.co_firstlineno, + func.__name__, newtok.type), lexdata[lexpos:]) + + return newtok + else: + # No match, see if in literals + if lexdata[lexpos] in self.lexliterals: + tok = LexToken() + tok.value = lexdata[lexpos] + tok.lineno = self.lineno + tok.type = tok.value + tok.lexpos = lexpos + self.lexpos = lexpos + 1 + return tok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = 'error' + tok.lexer = self + tok.lexpos = lexpos + self.lexpos = lexpos + newtok = self.lexerrorf(tok) + if lexpos == self.lexpos: + # Error method didn't change text position at all. This is an error. + raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + lexpos = self.lexpos + if not newtok: + continue + return newtok + + self.lexpos = lexpos + raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:]) + + if self.lexeoff: + tok = LexToken() + tok.type = 'eof' + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + self.lexpos = lexpos + newtok = self.lexeoff(tok) + return newtok + + self.lexpos = lexpos + 1 + if self.lexdata is None: + raise RuntimeError('No input string given with input()') + return None + + # Iterator interface + def __iter__(self): + return self + + def next(self): + t = self.token() + if t is None: + raise StopIteration + return t + + __next__ = next + +# ----------------------------------------------------------------------------- +# ==== Lex Builder === +# +# The functions and classes below are used to collect lexing information +# and build a Lexer object from it. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# _get_regex(func) +# +# Returns the regular expression assigned to a function either as a doc string +# or as a .regex attribute attached by the @TOKEN decorator. +# ----------------------------------------------------------------------------- +def _get_regex(func): + return getattr(func, 'regex', func.__doc__) + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- +def get_caller_module_dict(levels): + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict + +# ----------------------------------------------------------------------------- +# _funcs_to_names() +# +# Given a list of regular expression functions, this converts it to a list +# suitable for output to a table file +# ----------------------------------------------------------------------------- +def _funcs_to_names(funclist, namelist): + result = [] + for f, name in zip(funclist, namelist): + if f and f[0]: + result.append((name, f[1])) + else: + result.append(f) + return result + +# ----------------------------------------------------------------------------- +# _names_to_funcs() +# +# Given a list of regular expression function names, this converts it back to +# functions. +# ----------------------------------------------------------------------------- +def _names_to_funcs(namelist, fdict): + result = [] + for n in namelist: + if n and n[0]: + result.append((fdict[n[0]], n[1])) + else: + result.append(n) + return result + +# ----------------------------------------------------------------------------- +# _form_master_re() +# +# This function takes a list of all of the regex components and attempts to +# form the master regular expression. Given limitations in the Python re +# module, it may be necessary to break the master regex into separate expressions. +# ----------------------------------------------------------------------------- +def _form_master_re(relist, reflags, ldict, toknames): + if not relist: + return [] + regex = '|'.join(relist) + try: + lexre = re.compile(regex, re.VERBOSE | reflags) + + # Build the index to function map for the matching engine + lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) + lexindexnames = lexindexfunc[:] + + for f, i in lexre.groupindex.items(): + handle = ldict.get(f, None) + if type(handle) in (types.FunctionType, types.MethodType): + lexindexfunc[i] = (handle, toknames[f]) + lexindexnames[i] = f + elif handle is not None: + lexindexnames[i] = f + if f.find('ignore_') > 0: + lexindexfunc[i] = (None, None) + else: + lexindexfunc[i] = (None, toknames[f]) + + return [(lexre, lexindexfunc)], [regex], [lexindexnames] + except Exception: + m = int(len(relist)/2) + if m == 0: + m = 1 + llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) + rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) + return (llist+rlist), (lre+rre), (lnames+rnames) + +# ----------------------------------------------------------------------------- +# def _statetoken(s,names) +# +# Given a declaration name s of the form "t_" and a dictionary whose keys are +# state names, this function returns a tuple (states,tokenname) where states +# is a tuple of state names and tokenname is the name of the token. For example, +# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') +# ----------------------------------------------------------------------------- +def _statetoken(s, names): + nonstate = 1 + parts = s.split('_') + for i, part in enumerate(parts[1:], 1): + if part not in names and part != 'ANY': + break + + if i > 1: + states = tuple(parts[1:i]) + else: + states = ('INITIAL',) + + if 'ANY' in states: + states = tuple(names) + + tokenname = '_'.join(parts[i:]) + return (states, tokenname) + + +# ----------------------------------------------------------------------------- +# LexerReflect() +# +# This class represents information needed to build a lexer as extracted from a +# user's input file. +# ----------------------------------------------------------------------------- +class LexerReflect(object): + def __init__(self, ldict, log=None, reflags=0): + self.ldict = ldict + self.error_func = None + self.tokens = [] + self.reflags = reflags + self.stateinfo = {'INITIAL': 'inclusive'} + self.modules = set() + self.error = False + self.log = PlyLogger(sys.stderr) if log is None else log + + # Get all of the basic information + def get_all(self): + self.get_tokens() + self.get_literals() + self.get_states() + self.get_rules() + + # Validate all of the information + def validate_all(self): + self.validate_tokens() + self.validate_literals() + self.validate_rules() + return self.error + + # Get the tokens map + def get_tokens(self): + tokens = self.ldict.get('tokens', None) + if not tokens: + self.log.error('No token list is defined') + self.error = True + return + + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True + return + + if not tokens: + self.log.error('tokens is empty') + self.error = True + return + + self.tokens = tokens + + # Validate the tokens + def validate_tokens(self): + terminals = {} + for n in self.tokens: + if not _is_identifier.match(n): + self.log.error("Bad token name '%s'", n) + self.error = True + if n in terminals: + self.log.warning("Token '%s' multiply defined", n) + terminals[n] = 1 + + # Get the literals specifier + def get_literals(self): + self.literals = self.ldict.get('literals', '') + if not self.literals: + self.literals = '' + + # Validate literals + def validate_literals(self): + try: + for c in self.literals: + if not isinstance(c, StringTypes) or len(c) > 1: + self.log.error('Invalid literal %s. Must be a single character', repr(c)) + self.error = True + + except TypeError: + self.log.error('Invalid literals specification. literals must be a sequence of characters') + self.error = True + + def get_states(self): + self.states = self.ldict.get('states', None) + # Build statemap + if self.states: + if not isinstance(self.states, (tuple, list)): + self.log.error('states must be defined as a tuple or list') + self.error = True + else: + for s in self.states: + if not isinstance(s, tuple) or len(s) != 2: + self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')", repr(s)) + self.error = True + continue + name, statetype = s + if not isinstance(name, StringTypes): + self.log.error('State name %s must be a string', repr(name)) + self.error = True + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %s must be 'inclusive' or 'exclusive'", name) + self.error = True + continue + if name in self.stateinfo: + self.log.error("State '%s' already defined", name) + self.error = True + continue + self.stateinfo[name] = statetype + + # Get all of the symbols with a t_ prefix and sort them into various + # categories (functions, strings, error functions, and ignore characters) + + def get_rules(self): + tsymbols = [f for f in self.ldict if f[:2] == 't_'] + + # Now build up a list of functions and a list of strings + self.toknames = {} # Mapping of symbols to token names + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state + + for s in self.stateinfo: + self.funcsym[s] = [] + self.strsym[s] = [] + + if len(tsymbols) == 0: + self.log.error('No rules of the form t_rulename are defined') + self.error = True + return + + for f in tsymbols: + t = self.ldict[f] + states, tokname = _statetoken(f, self.stateinfo) + self.toknames[f] = tokname + + if hasattr(t, '__call__'): + if tokname == 'error': + for s in states: + self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t + elif tokname == 'ignore': + line = t.__code__.co_firstlineno + file = t.__code__.co_filename + self.log.error("%s:%d: Rule '%s' must be defined as a string", file, line, t.__name__) + self.error = True + else: + for s in states: + self.funcsym[s].append((f, t)) + elif isinstance(t, StringTypes): + if tokname == 'ignore': + for s in states: + self.ignore[s] = t + if '\\' in t: + self.log.warning("%s contains a literal backslash '\\'", f) + + elif tokname == 'error': + self.log.error("Rule '%s' must be defined as a function", f) + self.error = True + else: + for s in states: + self.strsym[s].append((f, t)) + else: + self.log.error('%s not defined as a function or string', f) + self.error = True + + # Sort the functions by line number + for f in self.funcsym.values(): + f.sort(key=lambda x: x[1].__code__.co_firstlineno) + + # Sort the strings by regular expression length + for s in self.strsym.values(): + s.sort(key=lambda x: len(x[1]), reverse=True) + + # Validate all of the t_rules collected + def validate_rules(self): + for state in self.stateinfo: + # Validate all rules defined by functions + + for fname, f in self.funcsym[state]: + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) + + tokname = self.toknames[fname] + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = f.__code__.co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True + continue + + if nargs < reqargs: + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True + continue + + if not _get_regex(f): + self.log.error("%s:%d: No regular expression defined for rule '%s'", file, line, f.__name__) + self.error = True + continue + + try: + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), re.VERBOSE | self.reflags) + if c.match(''): + self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__) + self.error = True + except re.error as e: + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) + if '#' in _get_regex(f): + self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", file, line, f.__name__) + self.error = True + + # Validate all rules defined by strings + for name, r in self.strsym[state]: + tokname = self.toknames[name] + if tokname == 'error': + self.log.error("Rule '%s' must be defined as a function", name) + self.error = True + continue + + if tokname not in self.tokens and tokname.find('ignore_') < 0: + self.log.error("Rule '%s' defined for an unspecified token %s", name, tokname) + self.error = True + continue + + try: + c = re.compile('(?P<%s>%s)' % (name, r), re.VERBOSE | self.reflags) + if (c.match('')): + self.log.error("Regular expression for rule '%s' matches empty string", name) + self.error = True + except re.error as e: + self.log.error("Invalid regular expression for rule '%s'. %s", name, e) + if '#' in r: + self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'", name) + self.error = True + + if not self.funcsym[state] and not self.strsym[state]: + self.log.error("No rules defined for state '%s'", state) + self.error = True + + # Validate the error function + efunc = self.errorf.get(state, None) + if efunc: + f = efunc + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) + + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = f.__code__.co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True + + if nargs < reqargs: + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True + + for module in self.modules: + self.validate_module(module) + + # ----------------------------------------------------------------------------- + # validate_module() + # + # This checks to see if there are duplicated t_rulename() functions or strings + # in the parser input file. This is done using a simple regular expression + # match on each line in the source code of the given module. + # ----------------------------------------------------------------------------- + + def validate_module(self, module): + try: + lines, linen = inspect.getsourcelines(module) + except IOError: + return + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + + counthash = {} + linen += 1 + for line in lines: + m = fre.match(line) + if not m: + m = sre.match(line) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + filename = inspect.getsourcefile(module) + self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) + self.error = True + linen += 1 + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab', + reflags=0, nowarn=False, outputdir=None, debuglog=None, errorlog=None): + + if lextab is None: + lextab = 'lextab' + + global lexer + + ldict = None + stateinfo = {'INITIAL': 'inclusive'} + lexobj = Lexer() + lexobj.lexoptimize = optimize + global token, input + + if errorlog is None: + errorlog = PlyLogger(sys.stderr) + + if debug: + if debuglog is None: + debuglog = PlyLogger(sys.stderr) + + # Get the module dictionary used for the lexer + if object: + module = object + + # Get the module dictionary used for the parser + if module: + _items = [(k, getattr(module, k)) for k in dir(module)] + ldict = dict(_items) + # If no __file__ attribute is available, try to obtain it from the __module__ instead + if '__file__' not in ldict: + ldict['__file__'] = sys.modules[ldict['__module__']].__file__ + else: + ldict = get_caller_module_dict(2) + + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = ldict.get('__package__') + if pkg and isinstance(lextab, str): + if '.' not in lextab: + lextab = pkg + '.' + lextab + + # Collect parser information from the dictionary + linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) + linfo.get_all() + if not optimize: + if linfo.validate_all(): + raise SyntaxError("Can't build lexer") + + if optimize and lextab: + try: + lexobj.readtab(lextab, ldict) + token = lexobj.token + input = lexobj.input + lexer = lexobj + return lexobj + + except ImportError: + pass + + # Dump some basic debugging information + if debug: + debuglog.info('lex: tokens = %r', linfo.tokens) + debuglog.info('lex: literals = %r', linfo.literals) + debuglog.info('lex: states = %r', linfo.stateinfo) + + # Build a dictionary of valid token names + lexobj.lextokens = set() + for n in linfo.tokens: + lexobj.lextokens.add(n) + + # Get literals specification + if isinstance(linfo.literals, (list, tuple)): + lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) + else: + lexobj.lexliterals = linfo.literals + + lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) + + # Get the stateinfo dictionary + stateinfo = linfo.stateinfo + + regexs = {} + # Build the master regular expressions + for state in stateinfo: + regex_list = [] + + # Add rules defined by functions first + for fname, f in linfo.funcsym[state]: + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) + + # Now add all of the simple rules + for name, r in linfo.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) + + regexs[state] = regex_list + + # Build the master regular expressions + + if debug: + debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') + + for state in regexs: + lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) + lexobj.lexstatere[state] = lexre + lexobj.lexstateretext[state] = re_text + lexobj.lexstaterenames[state] = re_names + if debug: + for i, text in enumerate(re_text): + debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) + + # For inclusive states, we need to add the regular expressions from the INITIAL state + for state, stype in stateinfo.items(): + if state != 'INITIAL' and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + + lexobj.lexstateinfo = stateinfo + lexobj.lexre = lexobj.lexstatere['INITIAL'] + lexobj.lexretext = lexobj.lexstateretext['INITIAL'] + lexobj.lexreflags = reflags + + # Set up ignore variables + lexobj.lexstateignore = linfo.ignore + lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') + + # Set up error functions + lexobj.lexstateerrorf = linfo.errorf + lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) + if not lexobj.lexerrorf: + errorlog.warning('No t_error rule is defined') + + # Set up eof functions + lexobj.lexstateeoff = linfo.eoff + lexobj.lexeoff = linfo.eoff.get('INITIAL', None) + + # Check state information for ignore and error rules + for s, stype in stateinfo.items(): + if stype == 'exclusive': + if s not in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state '%s'", s) + if s not in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) + elif stype == 'inclusive': + if s not in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get('INITIAL', None) + if s not in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get('INITIAL', '') + + # Create global versions of the token() and input() functions + token = lexobj.token + input = lexobj.input + lexer = lexobj + + # If in optimize mode, we write the lextab + if lextab and optimize: + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If lextab specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(lextab, types.ModuleType): + srcfile = lextab.__file__ + else: + if '.' not in lextab: + srcfile = ldict['__file__'] + else: + parts = lextab.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + try: + lexobj.writetab(lextab, outputdir) + except IOError as e: + errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e)) + + return lexobj + +# ----------------------------------------------------------------------------- +# runmain() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None, data=None): + if not data: + try: + filename = sys.argv[1] + f = open(filename) + data = f.read() + f.close() + except IndexError: + sys.stdout.write('Reading from standard input (type EOF to end):\n') + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while True: + tok = _token() + if not tok: + break + sys.stdout.write('(%s,%r,%d,%d)\n' % (tok.type, tok.value, tok.lineno, tok.lexpos)) + +# ----------------------------------------------------------------------------- +# @TOKEN(regex) +# +# This decorator function can be used to set the regex expression on a function +# when its docstring might need to be set in an alternative way +# ----------------------------------------------------------------------------- + +def TOKEN(r): + def set_regex(f): + if hasattr(r, '__call__'): + f.regex = _get_regex(r) + else: + f.regex = r + return f + return set_regex + +# Alternative spelling of the TOKEN decorator +Token = TOKEN + diff --git a/ply/yacc.py b/ply/yacc.py new file mode 100644 index 0000000..6842832 --- /dev/null +++ b/ply/yacc.py @@ -0,0 +1,3492 @@ +# ----------------------------------------------------------------------------- +# ply: yacc.py +# +# Copyright (C) 2001-2016 +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# +# This implements an LR parser that is constructed from grammar rules defined +# as Python functions. The grammer is specified by supplying the BNF inside +# Python documentation strings. The inspiration for this technique was borrowed +# from John Aycock's Spark parsing system. PLY might be viewed as cross between +# Spark and the GNU bison utility. +# +# The current implementation is only somewhat object-oriented. The +# LR parser itself is defined in terms of an object (which allows multiple +# parsers to co-exist). However, most of the variables used during table +# construction are defined in terms of global variables. Users shouldn't +# notice unless they are trying to define multiple parsers at the same +# time using threads (in which case they should have their head examined). +# +# This implementation supports both SLR and LALR(1) parsing. LALR(1) +# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), +# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, +# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced +# by the more efficient DeRemer and Pennello algorithm. +# +# :::::::: WARNING ::::::: +# +# Construction of LR parsing tables is fairly complicated and expensive. +# To make this module run fast, a *LOT* of work has been put into +# optimization---often at the expensive of readability and what might +# consider to be good Python "coding style." Modify the code at your +# own risk! +# ---------------------------------------------------------------------------- + +import re +import types +import sys +import os.path +import inspect +import base64 +import warnings + +__version__ = '3.9' +__tabversion__ = '3.8' + +#----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +#----------------------------------------------------------------------------- + +yaccdebug = True # Debugging mode. If set, yacc generates a + # a 'parser.out' file in the current directory + +debug_file = 'parser.out' # Default name of the debugging file +tab_module = 'parsetab' # Default name of the table module +default_lr = 'LALR' # Default LR table generation method + +error_count = 3 # Number of symbols that must be shifted to leave recovery mode + +yaccdevel = False # Set to True if developing yacc. This turns off optimized + # implementations of certain functions. + +resultlimit = 40 # Size limit of results when running in debug mode. + +pickle_protocol = 0 # Protocol to use when writing pickle files + +# String type-checking compatibility +if sys.version_info[0] < 3: + string_types = basestring +else: + string_types = str + +MAXINT = sys.maxsize + +# This object is a stand-in for a logging object created by the +# logging module. PLY will use this by default to create things +# such as the parser.out file. If a user wants more detailed +# information, they can create their own logging object and pass +# it into PLY. + +class PlyLogger(object): + def __init__(self, f): + self.f = f + + def debug(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + info = debug + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') + + critical = debug + +# Null logger is used when no output is generated. Does nothing. +class NullLogger(object): + def __getattribute__(self, name): + return self + + def __call__(self, *args, **kwargs): + return self + +# Exception raised for yacc-related errors +class YaccError(Exception): + pass + +# Format the result message that the parser produces when running in debug mode. +def format_result(r): + repr_str = repr(r) + if '\n' in repr_str: + repr_str = repr(repr_str) + if len(repr_str) > resultlimit: + repr_str = repr_str[:resultlimit] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) + return result + +# Format stack entries when the parser is running in debug mode +def format_stack_entry(r): + repr_str = repr(r) + if '\n' in repr_str: + repr_str = repr(repr_str) + if len(repr_str) < 16: + return repr_str + else: + return '<%s @ 0x%x>' % (type(r).__name__, id(r)) + +# Panic mode error recovery support. This feature is being reworked--much of the +# code here is to offer a deprecation/backwards compatible transition + +_errok = None +_token = None +_restart = None +_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). +Instead, invoke the methods on the associated parser instance: + + def p_error(p): + ... + # Use parser.errok(), parser.token(), parser.restart() + ... + + parser = yacc.yacc() +''' + +def errok(): + warnings.warn(_warnmsg) + return _errok() + +def restart(): + warnings.warn(_warnmsg) + return _restart() + +def token(): + warnings.warn(_warnmsg) + return _token() + +# Utility function to call the p_error() function with some deprecation hacks +def call_errorfunc(errorfunc, token, parser): + global _errok, _token, _restart + _errok = parser.errok + _token = parser.token + _restart = parser.restart + r = errorfunc(token) + try: + del _errok, _token, _restart + except NameError: + pass + return r + +#----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +#----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: +# .type = Grammar symbol type +# .value = Symbol value +# .lineno = Starting line number +# .endlineno = Ending line number (optional, set automatically) +# .lexpos = Starting lex position +# .endlexpos = Ending lex position (optional, set automatically) + +class YaccSymbol: + def __str__(self): + return self.type + + def __repr__(self): + return str(self) + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) +# representing the range of positional information for a symbol. + +class YaccProduction: + def __init__(self, s, stack=None): + self.slice = s + self.stack = stack + self.lexer = None + self.parser = None + + def __getitem__(self, n): + if isinstance(n, slice): + return [s.value for s in self.slice[n]] + elif n >= 0: + return self.slice[n].value + else: + return self.stack[n].value + + def __setitem__(self, n, v): + self.slice[n].value = v + + def __getslice__(self, i, j): + return [s.value for s in self.slice[i:j]] + + def __len__(self): + return len(self.slice) + + def lineno(self, n): + return getattr(self.slice[n], 'lineno', 0) + + def set_lineno(self, n, lineno): + self.slice[n].lineno = lineno + + def linespan(self, n): + startline = getattr(self.slice[n], 'lineno', 0) + endline = getattr(self.slice[n], 'endlineno', startline) + return startline, endline + + def lexpos(self, n): + return getattr(self.slice[n], 'lexpos', 0) + + def lexspan(self, n): + startpos = getattr(self.slice[n], 'lexpos', 0) + endpos = getattr(self.slice[n], 'endlexpos', startpos) + return startpos, endpos + + def error(self): + raise SyntaxError + +# ----------------------------------------------------------------------------- +# == LRParser == +# +# The LR Parsing engine. +# ----------------------------------------------------------------------------- + +class LRParser: + def __init__(self, lrtab, errorf): + self.productions = lrtab.lr_productions + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + self.set_defaulted_states() + self.errorok = True + + def errok(self): + self.errorok = True + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$end' + self.symstack.append(sym) + self.statestack.append(0) + + # Defaulted state support. + # This method identifies parser states where there is only one possible reduction action. + # For such states, the parser can make a choose to make a rule reduction without consuming + # the next look-ahead token. This delayed invocation of the tokenizer can be useful in + # certain kinds of advanced parsing situations where the lexer and parser interact with + # each other or change states (i.e., manipulation of scope, lexer states, etc.). + # + # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions + def set_defaulted_states(self): + self.defaulted_states = {} + for state, actions in self.action.items(): + rules = list(actions.values()) + if len(rules) == 1 and rules[0] < 0: + self.defaulted_states[state] = rules[0] + + def disable_defaulted_states(self): + self.defaulted_states = {} + + def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + if debug or yaccdevel: + if isinstance(debug, int): + debug = PlyLogger(sys.stderr) + return self.parsedebug(input, lexer, debug, tracking, tokenfunc) + elif tracking: + return self.parseopt(input, lexer, debug, tracking, tokenfunc) + else: + return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) + + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parsedebug(). + # + # This is the debugging enabled version of parse(). All changes made to the + # parsing engine should be made here. Optimized versions of this function + # are automatically created by the ply/ygen.py script. This script cuts out + # sections enclosed in markers such as this: + # + # #--! DEBUG + # statements + # #--! DEBUG + # + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parsedebug-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + #--! DEBUG + debug.info('PLY: PARSE DEBUG START') + #--! DEBUG + + # If no lexer was given, we will try to use the lex module + if not lexer: + from . import lex + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token + + # Set up the state and symbol stacks + + statestack = [] # Stack of parsing states + self.statestack = statestack + symstack = [] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while True: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + #--! DEBUG + debug.debug('') + debug.debug('State : %s', state) + #--! DEBUG + + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + #--! DEBUG + debug.debug('Defaulted state %s: Reduce using %d', state, -t) + #--! DEBUG + + #--! DEBUG + debug.debug('Stack : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + #--! DEBUG + debug.debug('Action : Shift and goto state %s', t) + #--! DEBUG + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: + errorcount -= 1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + #--! DEBUG + if plen: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', + goto[statestack[-1-plen]][pname]) + else: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][pname]) + + #--! DEBUG + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + #--! TRACKING + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state + p.callable(pslice) + del statestack[-plen:] + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + #--! TRACKING + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING + + targ = [sym] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + self.state = state + p.callable(pslice) + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + result = getattr(n, 'value', None) + #--! DEBUG + debug.info('Done : Returning %s', format_result(result)) + debug.info('PLY: PARSE DEBUG END') + #--! DEBUG + return result + + if t is None: + + #--! DEBUG + debug.error('Error : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = False + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + if errtoken and not hasattr(errtoken, 'lexer'): + errtoken.lexer = lexer + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) + else: + sys.stderr.write('yacc: Parse error in input. EOF\n') + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING + lookahead = None + continue + + # Create the error symbol for the first time and make it the new lookahead symbol + t = YaccSymbol() + t.type = 'error' + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING + statestack.pop() + state = statestack[-1] + + continue + + # Call an error function here + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parsedebug-end + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parseopt(). + # + # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! + # This code is automatically generated by the ply/ygen.py script. Make + # changes to the parsedebug() method instead. + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + + # If no lexer was given, we will try to use the lex module + if not lexer: + from . import lex + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token + + # Set up the state and symbol stacks + + statestack = [] # Stack of parsing states + self.statestack = statestack + symstack = [] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while True: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: + errorcount -= 1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + #--! TRACKING + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state + p.callable(pslice) + del statestack[-plen:] + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + #--! TRACKING + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING + + targ = [sym] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + self.state = state + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + result = getattr(n, 'value', None) + return result + + if t is None: + + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = False + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + if errtoken and not hasattr(errtoken, 'lexer'): + errtoken.lexer = lexer + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) + else: + sys.stderr.write('yacc: Parse error in input. EOF\n') + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING + lookahead = None + continue + + # Create the error symbol for the first time and make it the new lookahead symbol + t = YaccSymbol() + t.type = 'error' + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING + statestack.pop() + state = statestack[-1] + + continue + + # Call an error function here + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-end + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # parseopt_notrack(). + # + # Optimized version of parseopt() with line number tracking removed. + # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated + # by the ply/ygen.py script. Make changes to the parsedebug() method instead. + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-notrack-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + + # If no lexer was given, we will try to use the lex module + if not lexer: + from . import lex + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + if tokenfunc is None: + # Tokenize function + get_token = lexer.token + else: + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token + + # Set up the state and symbol stacks + + statestack = [] # Stack of parsing states + self.statestack = statestack + symstack = [] # Stack of grammar symbols + self.symstack = symstack + + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while True: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: + errorcount -= 1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state + p.callable(pslice) + del statestack[-plen:] + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + else: + + + targ = [sym] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + self.state = state + p.callable(pslice) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + if t == 0: + n = symstack[-1] + result = getattr(n, 'value', None) + return result + + if t is None: + + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = False + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + if errtoken and not hasattr(errtoken, 'lexer'): + errtoken.lexer = lexer + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) + else: + sys.stderr.write('yacc: Parse error in input. EOF\n') + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + lookahead = None + continue + + # Create the error symbol for the first time and make it the new lookahead symbol + t = YaccSymbol() + t.type = 'error' + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + sym = symstack.pop() + statestack.pop() + state = statestack[-1] + + continue + + # Call an error function here + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-notrack-end + +# ----------------------------------------------------------------------------- +# === Grammar Representation === +# +# The following functions, classes, and variables are used to represent and +# manipulate the rules that make up a grammar. +# ----------------------------------------------------------------------------- + +# regex matching identifiers +_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') + +# ----------------------------------------------------------------------------- +# class Production: +# +# This class stores the raw information about a single production or grammar rule. +# A grammar rule refers to a specification such as this: +# +# expr : expr PLUS term +# +# Here are the basic attributes defined on all productions +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','PLUS','term'] +# prec - Production precedence level +# number - Production number. +# func - Function that executes on reduce +# file - File where production function is defined +# lineno - Line number where production function is defined +# +# The following attributes are defined or optional. +# +# len - Length of the production (number of symbols on right hand side) +# usyms - Set of unique symbols found in the production +# ----------------------------------------------------------------------------- + +class Production(object): + reduced = 0 + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): + self.name = name + self.prod = tuple(prod) + self.number = number + self.func = func + self.callable = None + self.file = file + self.line = line + self.prec = precedence + + # Internal settings used during table construction + + self.len = len(self.prod) # Length of the production + + # Create a list of unique production symbols used in the production + self.usyms = [] + for s in self.prod: + if s not in self.usyms: + self.usyms.append(s) + + # List of all LR items for the production + self.lr_items = [] + self.lr_next = None + + # Create a string representation + if self.prod: + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) + else: + self.str = '%s -> ' % self.name + + def __str__(self): + return self.str + + def __repr__(self): + return 'Production(' + str(self) + ')' + + def __len__(self): + return len(self.prod) + + def __nonzero__(self): + return 1 + + def __getitem__(self, index): + return self.prod[index] + + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self, n): + if n > len(self.prod): + return None + p = LRItem(self, n) + # Precompute the list of productions immediately following. + try: + p.lr_after = Prodnames[p.prod[n+1]] + except (IndexError, KeyError): + p.lr_after = [] + try: + p.lr_before = p.prod[n-1] + except IndexError: + p.lr_before = None + return p + + # Bind the production function name to a callable + def bind(self, pdict): + if self.func: + self.callable = pdict[self.func] + +# This class serves as a minimal standin for Production objects when +# reading table data from files. It only contains information +# actually used by the LR parsing engine, plus some additional +# debugging information. +class MiniProduction(object): + def __init__(self, str, name, len, func, file, line): + self.name = name + self.len = len + self.func = func + self.callable = None + self.file = file + self.line = line + self.str = str + + def __str__(self): + return self.str + + def __repr__(self): + return 'MiniProduction(%s)' % self.str + + # Bind the production function name to a callable + def bind(self, pdict): + if self.func: + self.callable = pdict[self.func] + + +# ----------------------------------------------------------------------------- +# class LRItem +# +# This class represents a specific stage of parsing a production rule. For +# example: +# +# expr : expr . PLUS term +# +# In the above, the "." represents the current location of the parse. Here +# basic attributes: +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] +# number - Production number. +# +# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' +# then lr_next refers to 'expr -> expr PLUS . term' +# lr_index - LR item index (location of the ".") in the prod list. +# lookaheads - LALR lookahead symbols for this item +# len - Length of the production (number of symbols on right hand side) +# lr_after - List of all productions that immediately follow +# lr_before - Grammar symbol immediately before +# ----------------------------------------------------------------------------- + +class LRItem(object): + def __init__(self, p, n): + self.name = p.name + self.prod = list(p.prod) + self.number = p.number + self.lr_index = n + self.lookaheads = {} + self.prod.insert(n, '.') + self.prod = tuple(self.prod) + self.len = len(self.prod) + self.usyms = p.usyms + + def __str__(self): + if self.prod: + s = '%s -> %s' % (self.name, ' '.join(self.prod)) + else: + s = '%s -> ' % self.name + return s + + def __repr__(self): + return 'LRItem(' + str(self) + ')' + +# ----------------------------------------------------------------------------- +# rightmost_terminal() +# +# Return the rightmost terminal from a list of symbols. Used in add_production() +# ----------------------------------------------------------------------------- +def rightmost_terminal(symbols, terminals): + i = len(symbols) - 1 + while i >= 0: + if symbols[i] in terminals: + return symbols[i] + i -= 1 + return None + +# ----------------------------------------------------------------------------- +# === GRAMMAR CLASS === +# +# The following class represents the contents of the specified grammar along +# with various computed properties such as first sets, follow sets, LR items, etc. +# This data is used for critical parts of the table generation process later. +# ----------------------------------------------------------------------------- + +class GrammarError(YaccError): + pass + +class Grammar(object): + def __init__(self, terminals): + self.Productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + self.Prodmap = {} # A dictionary that is only used to detect duplicate + # productions. + + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + for term in terminals: + self.Terminals[term] = [] + + self.Terminals['error'] = [] + + self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + self.First = {} # A dictionary of precomputed FIRST(x) symbols + + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols + + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. + # This is only used to provide error checking and to generate + # a warning about unused precedence rules. + + self.Start = None # Starting symbol for the grammar + + + def __len__(self): + return len(self.Productions) + + def __getitem__(self, index): + return self.Productions[index] + + # ----------------------------------------------------------------------------- + # set_precedence() + # + # Sets the precedence for a given terminal. assoc is the associativity such as + # 'left','right', or 'nonassoc'. level is a numeric level. + # + # ----------------------------------------------------------------------------- + + def set_precedence(self, term, assoc, level): + assert self.Productions == [None], 'Must call set_precedence() before add_production()' + if term in self.Precedence: + raise GrammarError('Precedence already specified for terminal %r' % term) + if assoc not in ['left', 'right', 'nonassoc']: + raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") + self.Precedence[term] = (assoc, level) + + # ----------------------------------------------------------------------------- + # add_production() + # + # Given an action function, this function assembles a production rule and + # computes its precedence level. + # + # The production rule is supplied as a list of symbols. For example, + # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and + # symbols ['expr','PLUS','term']. + # + # Precedence is determined by the precedence of the right-most non-terminal + # or the precedence of a terminal specified by %prec. + # + # A variety of error checks are performed to make sure production symbols + # are valid and that %prec is used correctly. + # ----------------------------------------------------------------------------- + + def add_production(self, prodname, syms, func=None, file='', line=0): + + if prodname in self.Terminals: + raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) + if prodname == 'error': + raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) + if not _is_identifier.match(prodname): + raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) + + # Look for literal tokens + for n, s in enumerate(syms): + if s[0] in "'\"": + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % + (file, line, s, prodname)) + if c not in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass + if not _is_identifier.match(s) and s != '%prec': + raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) + + # Determine the precedence level + if '%prec' in syms: + if syms[-1] == '%prec': + raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) + if syms[-2] != '%prec': + raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % + (file, line)) + precname = syms[-1] + prodprec = self.Precedence.get(precname) + if not prodprec: + raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) + else: + self.UsedPrecedence.add(precname) + del syms[-2:] # Drop %prec from the rule + else: + # If no %prec, precedence is determined by the rightmost terminal symbol + precname = rightmost_terminal(syms, self.Terminals) + prodprec = self.Precedence.get(precname, ('right', 0)) + + # See if the rule is already in the rulemap + map = '%s -> %s' % (prodname, syms) + if map in self.Prodmap: + m = self.Prodmap[map] + raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + + 'Previous definition at %s:%d' % (m.file, m.line)) + + # From this point on, everything is valid. Create a new Production instance + pnumber = len(self.Productions) + if prodname not in self.Nonterminals: + self.Nonterminals[prodname] = [] + + # Add the production number to Terminals and Nonterminals + for t in syms: + if t in self.Terminals: + self.Terminals[t].append(pnumber) + else: + if t not in self.Nonterminals: + self.Nonterminals[t] = [] + self.Nonterminals[t].append(pnumber) + + # Create a production and add it to the list of productions + p = Production(pnumber, prodname, syms, prodprec, func, file, line) + self.Productions.append(p) + self.Prodmap[map] = p + + # Add to the global productions list + try: + self.Prodnames[prodname].append(p) + except KeyError: + self.Prodnames[prodname] = [p] + + # ----------------------------------------------------------------------------- + # set_start() + # + # Sets the starting symbol and creates the augmented grammar. Production + # rule 0 is S' -> start where start is the start symbol. + # ----------------------------------------------------------------------------- + + def set_start(self, start=None): + if not start: + start = self.Productions[1].name + if start not in self.Nonterminals: + raise GrammarError('start symbol %s undefined' % start) + self.Productions[0] = Production(0, "S'", [start]) + self.Nonterminals[start].append(0) + self.Start = start + + # ----------------------------------------------------------------------------- + # find_unreachable() + # + # Find all of the nonterminal symbols that can't be reached from the starting + # symbol. Returns a list of nonterminals that can't be reached. + # ----------------------------------------------------------------------------- + + def find_unreachable(self): + + # Mark all symbols that are reachable from a symbol s + def mark_reachable_from(s): + if s in reachable: + return + reachable.add(s) + for p in self.Prodnames.get(s, []): + for r in p.prod: + mark_reachable_from(r) + + reachable = set() + mark_reachable_from(self.Productions[0].prod[0]) + return [s for s in self.Nonterminals if s not in reachable] + + # ----------------------------------------------------------------------------- + # infinite_cycles() + # + # This function looks at the various parsing rules and tries to detect + # infinite recursion cycles (grammar rules where there is no possible way + # to derive a string of only terminals). + # ----------------------------------------------------------------------------- + + def infinite_cycles(self): + terminates = {} + + # Terminals: + for t in self.Terminals: + terminates[t] = True + + terminates['$end'] = True + + # Nonterminals: + + # Initialize to false: + for n in self.Nonterminals: + terminates[n] = False + + # Then propagate termination until no change: + while True: + some_change = False + for (n, pl) in self.Prodnames.items(): + # Nonterminal n terminates iff any of its productions terminates. + for p in pl: + # Production p terminates iff all of its rhs symbols terminate. + for s in p.prod: + if not terminates[s]: + # The symbol s does not terminate, + # so production p does not terminate. + p_terminates = False + break + else: + # didn't break from the loop, + # so every symbol s terminates + # so production p terminates. + p_terminates = True + + if p_terminates: + # symbol n terminates! + if not terminates[n]: + terminates[n] = True + some_change = True + # Don't need to consider any more productions for this n. + break + + if not some_change: + break + + infinite = [] + for (s, term) in terminates.items(): + if not term: + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + infinite.append(s) + + return infinite + + # ----------------------------------------------------------------------------- + # undefined_symbols() + # + # Find all symbols that were used the grammar, but not defined as tokens or + # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol + # and prod is the production where the symbol was used. + # ----------------------------------------------------------------------------- + def undefined_symbols(self): + result = [] + for p in self.Productions: + if not p: + continue + + for s in p.prod: + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + result.append((s, p)) + return result + + # ----------------------------------------------------------------------------- + # unused_terminals() + # + # Find all terminals that were defined, but not used by the grammar. Returns + # a list of all symbols. + # ----------------------------------------------------------------------------- + def unused_terminals(self): + unused_tok = [] + for s, v in self.Terminals.items(): + if s != 'error' and not v: + unused_tok.append(s) + + return unused_tok + + # ------------------------------------------------------------------------------ + # unused_rules() + # + # Find all grammar rules that were defined, but not used (maybe not reachable) + # Returns a list of productions. + # ------------------------------------------------------------------------------ + + def unused_rules(self): + unused_prod = [] + for s, v in self.Nonterminals.items(): + if not v: + p = self.Prodnames[s][0] + unused_prod.append(p) + return unused_prod + + # ----------------------------------------------------------------------------- + # unused_precedence() + # + # Returns a list of tuples (term,precedence) corresponding to precedence + # rules that were never used by the grammar. term is the name of the terminal + # on which precedence was applied and precedence is a string such as 'left' or + # 'right' corresponding to the type of precedence. + # ----------------------------------------------------------------------------- + + def unused_precedence(self): + unused = [] + for termname in self.Precedence: + if not (termname in self.Terminals or termname in self.UsedPrecedence): + unused.append((termname, self.Precedence[termname][0])) + + return unused + + # ------------------------------------------------------------------------- + # _first() + # + # Compute the value of FIRST1(beta) where beta is a tuple of symbols. + # + # During execution of compute_first1, the result may be incomplete. + # Afterward (e.g., when called from compute_follow()), it will be complete. + # ------------------------------------------------------------------------- + def _first(self, beta): + + # We are computing First(x1,x2,x3,...,xn) + result = [] + for x in beta: + x_produces_empty = False + + # Add all the non- symbols of First[x] to the result. + for f in self.First[x]: + if f == '': + x_produces_empty = True + else: + if f not in result: + result.append(f) + + if x_produces_empty: + # We have to consider the next x in beta, + # i.e. stay in the loop. + pass + else: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('') + + return result + + # ------------------------------------------------------------------------- + # compute_first() + # + # Compute the value of FIRST1(X) for all symbols + # ------------------------------------------------------------------------- + def compute_first(self): + if self.First: + return self.First + + # Terminals: + for t in self.Terminals: + self.First[t] = [t] + + self.First['$end'] = ['$end'] + + # Nonterminals: + + # Initialize to the empty set: + for n in self.Nonterminals: + self.First[n] = [] + + # Then propagate symbols until no change: + while True: + some_change = False + for n in self.Nonterminals: + for p in self.Prodnames[n]: + for f in self._first(p.prod): + if f not in self.First[n]: + self.First[n].append(f) + some_change = True + if not some_change: + break + + return self.First + + # --------------------------------------------------------------------- + # compute_follow() + # + # Computes all of the follow sets for every non-terminal symbol. The + # follow set is the set of all symbols that might follow a given + # non-terminal. See the Dragon book, 2nd Ed. p. 189. + # --------------------------------------------------------------------- + def compute_follow(self, start=None): + # If already computed, return the result + if self.Follow: + return self.Follow + + # If first sets not computed yet, do that first. + if not self.First: + self.compute_first() + + # Add '$end' to the follow list of the start symbol + for k in self.Nonterminals: + self.Follow[k] = [] + + if not start: + start = self.Productions[1].name + + self.Follow[start] = ['$end'] + + while True: + didadd = False + for p in self.Productions[1:]: + # Here is the production set + for i, B in enumerate(p.prod): + if B in self.Nonterminals: + # Okay. We got a non-terminal in a production + fst = self._first(p.prod[i+1:]) + hasempty = False + for f in fst: + if f != '' and f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = True + if f == '': + hasempty = True + if hasempty or i == (len(p.prod)-1): + # Add elements of follow(a) to follow(b) + for f in self.Follow[p.name]: + if f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = True + if not didadd: + break + return self.Follow + + + # ----------------------------------------------------------------------------- + # build_lritems() + # + # This function walks the list of productions and builds a complete set of the + # LR items. The LR items are stored in two ways: First, they are uniquely + # numbered and placed in the list _lritems. Second, a linked list of LR items + # is built for each production. For example: + # + # E -> E PLUS E + # + # Creates the list + # + # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] + # ----------------------------------------------------------------------------- + + def build_lritems(self): + for p in self.Productions: + lastlri = p + i = 0 + lr_items = [] + while True: + if i > len(p): + lri = None + else: + lri = LRItem(p, i) + # Precompute the list of productions immediately following + try: + lri.lr_after = self.Prodnames[lri.prod[i+1]] + except (IndexError, KeyError): + lri.lr_after = [] + try: + lri.lr_before = lri.prod[i-1] + except IndexError: + lri.lr_before = None + + lastlri.lr_next = lri + if not lri: + break + lr_items.append(lri) + lastlri = lri + i += 1 + p.lr_items = lr_items + +# ----------------------------------------------------------------------------- +# == Class LRTable == +# +# This basic class represents a basic table of LR parsing information. +# Methods for generating the tables are not defined here. They are defined +# in the derived class LRGeneratedTable. +# ----------------------------------------------------------------------------- + +class VersionError(YaccError): + pass + +class LRTable(object): + def __init__(self): + self.lr_action = None + self.lr_goto = None + self.lr_productions = None + self.lr_method = None + + def read_table(self, module): + if isinstance(module, types.ModuleType): + parsetab = module + else: + exec('import %s' % module) + parsetab = sys.modules[module] + + if parsetab._tabversion != __tabversion__: + raise VersionError('yacc table file version is out of date') + + self.lr_action = parsetab._lr_action + self.lr_goto = parsetab._lr_goto + + self.lr_productions = [] + for p in parsetab._lr_productions: + self.lr_productions.append(MiniProduction(*p)) + + self.lr_method = parsetab._lr_method + return parsetab._lr_signature + + def read_pickle(self, filename): + try: + import cPickle as pickle + except ImportError: + import pickle + + if not os.path.exists(filename): + raise ImportError + + in_f = open(filename, 'rb') + + tabversion = pickle.load(in_f) + if tabversion != __tabversion__: + raise VersionError('yacc table file version is out of date') + self.lr_method = pickle.load(in_f) + signature = pickle.load(in_f) + self.lr_action = pickle.load(in_f) + self.lr_goto = pickle.load(in_f) + productions = pickle.load(in_f) + + self.lr_productions = [] + for p in productions: + self.lr_productions.append(MiniProduction(*p)) + + in_f.close() + return signature + + # Bind all production function names to callable objects in pdict + def bind_callables(self, pdict): + for p in self.lr_productions: + p.bind(pdict) + + +# ----------------------------------------------------------------------------- +# === LR Generator === +# +# The following classes and functions are used to generate LR parsing tables on +# a grammar. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# digraph() +# traverse() +# +# The following two functions are used to compute set valued functions +# of the form: +# +# F(x) = F'(x) U U{F(y) | x R y} +# +# This is used to compute the values of Read() sets as well as FOLLOW sets +# in LALR(1) generation. +# +# Inputs: X - An input set +# R - A relation +# FP - Set-valued function +# ------------------------------------------------------------------------------ + +def digraph(X, R, FP): + N = {} + for x in X: + N[x] = 0 + stack = [] + F = {} + for x in X: + if N[x] == 0: + traverse(x, N, stack, F, X, R, FP) + return F + +def traverse(x, N, stack, F, X, R, FP): + stack.append(x) + d = len(stack) + N[x] = d + F[x] = FP(x) # F(X) <- F'(x) + + rel = R(x) # Get y's related to x + for y in rel: + if N[y] == 0: + traverse(y, N, stack, F, X, R, FP) + N[x] = min(N[x], N[y]) + for a in F.get(y, []): + if a not in F[x]: + F[x].append(a) + if N[x] == d: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + +class LALRError(YaccError): + pass + +# ----------------------------------------------------------------------------- +# == LRGeneratedTable == +# +# This class implements the LR table generation algorithm. There are no +# public methods except for write() +# ----------------------------------------------------------------------------- + +class LRGeneratedTable(LRTable): + def __init__(self, grammar, method='LALR', log=None): + if method not in ['SLR', 'LALR']: + raise LALRError('Unsupported method %s' % method) + + self.grammar = grammar + self.lr_method = method + + # Set up the logger + if not log: + log = NullLogger() + self.log = log + + # Internal attributes + self.lr_action = {} # Action table + self.lr_goto = {} # Goto table + self.lr_productions = grammar.Productions # Copy of grammar Production array + self.lr_goto_cache = {} # Cache of computed gotos + self.lr0_cidhash = {} # Cache of closures + + self._add_count = 0 # Internal counter used to detect cycles + + # Diagonistic information filled in by the table generator + self.sr_conflict = 0 + self.rr_conflict = 0 + self.conflicts = [] # List of conflicts + + self.sr_conflicts = [] + self.rr_conflicts = [] + + # Build the tables + self.grammar.build_lritems() + self.grammar.compute_first() + self.grammar.compute_follow() + self.lr_parse_table() + + # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. + + def lr0_closure(self, I): + self._add_count += 1 + + # Add everything in I to J + J = I[:] + didadd = True + while didadd: + didadd = False + for j in J: + for x in j.lr_after: + if getattr(x, 'lr0_added', 0) == self._add_count: + continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = self._add_count + didadd = True + + return J + + # Compute the LR(0) goto function goto(I,X) where I is a set + # of LR(0) items and X is a grammar symbol. This function is written + # in a way that guarantees uniqueness of the generated goto sets + # (i.e. the same goto set will never be returned as two different Python + # objects). With uniqueness, we can later do fast set comparisons using + # id(obj) instead of element-wise comparison. + + def lr0_goto(self, I, x): + # First we look for a previously cached entry + g = self.lr_goto_cache.get((id(I), x)) + if g: + return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = self.lr_goto_cache.get(x) + if not s: + s = {} + self.lr_goto_cache[x] = s + + gs = [] + for p in I: + n = p.lr_next + if n and n.lr_before == x: + s1 = s.get(id(n)) + if not s1: + s1 = {} + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$end') + if not g: + if gs: + g = self.lr0_closure(gs) + s['$end'] = g + else: + s['$end'] = gs + self.lr_goto_cache[(id(I), x)] = g + return g + + # Compute the LR(0) sets of item function + def lr0_items(self): + C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] + i = 0 + for I in C: + self.lr0_cidhash[id(I)] = i + i += 1 + + # Loop over the items in C and each grammar symbols + i = 0 + while i < len(C): + I = C[i] + i += 1 + + # Collect all of the symbols that could possibly be in the goto(I,X) sets + asyms = {} + for ii in I: + for s in ii.usyms: + asyms[s] = None + + for x in asyms: + g = self.lr0_goto(I, x) + if not g or id(g) in self.lr0_cidhash: + continue + self.lr0_cidhash[id(g)] = len(C) + C.append(g) + + return C + + # ----------------------------------------------------------------------------- + # ==== LALR(1) Parsing ==== + # + # LALR(1) parsing is almost exactly the same as SLR except that instead of + # relying upon Follow() sets when performing reductions, a more selective + # lookahead set that incorporates the state of the LR(0) machine is utilized. + # Thus, we mainly just have to focus on calculating the lookahead sets. + # + # The method used here is due to DeRemer and Pennelo (1982). + # + # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) + # Lookahead Sets", ACM Transactions on Programming Languages and Systems, + # Vol. 4, No. 4, Oct. 1982, pp. 615-649 + # + # Further details can also be found in: + # + # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", + # McGraw-Hill Book Company, (1985). + # + # ----------------------------------------------------------------------------- + + # ----------------------------------------------------------------------------- + # compute_nullable_nonterminals() + # + # Creates a dictionary containing all of the non-terminals that might produce + # an empty production. + # ----------------------------------------------------------------------------- + + def compute_nullable_nonterminals(self): + nullable = set() + num_nullable = 0 + while True: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable.add(p.name) + continue + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + if len(nullable) == num_nullable: + break + num_nullable = len(nullable) + return nullable + + # ----------------------------------------------------------------------------- + # find_nonterminal_trans(C) + # + # Given a set of LR(0) items, this functions finds all of the non-terminal + # transitions. These are transitions in which a dot appears immediately before + # a non-terminal. Returns a list of tuples of the form (state,N) where state + # is the state number and N is the nonterminal symbol. + # + # The input C is the set of LR(0) items. + # ----------------------------------------------------------------------------- + + def find_nonterminal_transitions(self, C): + trans = [] + for stateno, state in enumerate(C): + for p in state: + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: + trans.append(t) + return trans + + # ----------------------------------------------------------------------------- + # dr_relation() + # + # Computes the DR(p,A) relationships for non-terminal transitions. The input + # is a tuple (state,N) where state is a number and N is a nonterminal symbol. + # + # Returns a list of terminals. + # ----------------------------------------------------------------------------- + + def dr_relation(self, C, trans, nullable): + dr_set = {} + state, N = trans + terms = [] + + g = self.lr0_goto(C[state], N) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: + terms.append(a) + + # This extra bit is to handle the start state + if state == 0 and N == self.grammar.Productions[0].prod[0]: + terms.append('$end') + + return terms + + # ----------------------------------------------------------------------------- + # reads_relation() + # + # Computes the READS() relation (p,A) READS (t,C). + # ----------------------------------------------------------------------------- + + def reads_relation(self, C, trans, empty): + # Look for empty transitions + rel = [] + state, N = trans + + g = self.lr0_goto(C[state], N) + j = self.lr0_cidhash.get(id(g), -1) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j, a)) + + return rel + + # ----------------------------------------------------------------------------- + # compute_lookback_includes() + # + # Determines the lookback and includes relations + # + # LOOKBACK: + # + # This relation is determined by running the LR(0) state machine forward. + # For example, starting with a production "N : . A B C", we run it forward + # to obtain "N : A B C ." We then build a relationship between this final + # state and the starting state. These relationships are stored in a dictionary + # lookdict. + # + # INCLUDES: + # + # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). + # + # This relation is used to determine non-terminal transitions that occur + # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) + # if the following holds: + # + # B -> LAT, where T -> epsilon and p' -L-> p + # + # L is essentially a prefix (which may be empty), T is a suffix that must be + # able to derive an empty string. State p' must lead to state p with the string L. + # + # ----------------------------------------------------------------------------- + + def compute_lookback_includes(self, C, trans, nullable): + lookdict = {} # Dictionary of lookback relations + includedict = {} # Dictionary of include relations + + # Make a dictionary of non-terminal transitions + dtrans = {} + for t in trans: + dtrans[t] = 1 + + # Loop over all transitions and compute lookbacks and includes + for state, N in trans: + lookb = [] + includes = [] + for p in C[state]: + if p.name != N: + continue + + # Okay, we have a name match. We now follow the production all the way + # through the state machine until we get the . on the right hand side + + lr_index = p.lr_index + j = state + while lr_index < p.len - 1: + lr_index = lr_index + 1 + t = p.prod[lr_index] + + # Check to see if this symbol and state are a non-terminal transition + if (j, t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty + + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: + break # No forget it + if p.prod[li] not in nullable: + break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j, t)) + + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state + + # When we get here, j is the final state, now we have to locate the production + for r in C[j]: + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: + break + i = i + 1 + else: + lookb.append((j, r)) + for i in includes: + if i not in includedict: + includedict[i] = [] + includedict[i].append((state, N)) + lookdict[(state, N)] = lookb + + return lookdict, includedict + + # ----------------------------------------------------------------------------- + # compute_read_sets() + # + # Given a set of LR(0) items, this function computes the read sets. + # + # Inputs: C = Set of LR(0) items + # ntrans = Set of nonterminal transitions + # nullable = Set of empty transitions + # + # Returns a set containing the read sets + # ----------------------------------------------------------------------------- + + def compute_read_sets(self, C, ntrans, nullable): + FP = lambda x: self.dr_relation(C, x, nullable) + R = lambda x: self.reads_relation(C, x, nullable) + F = digraph(ntrans, R, FP) + return F + + # ----------------------------------------------------------------------------- + # compute_follow_sets() + # + # Given a set of LR(0) items, a set of non-terminal transitions, a readset, + # and an include set, this function computes the follow sets + # + # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} + # + # Inputs: + # ntrans = Set of nonterminal transitions + # readsets = Readset (previously computed) + # inclsets = Include sets (previously computed) + # + # Returns a set containing the follow sets + # ----------------------------------------------------------------------------- + + def compute_follow_sets(self, ntrans, readsets, inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x, []) + F = digraph(ntrans, R, FP) + return F + + # ----------------------------------------------------------------------------- + # add_lookaheads() + # + # Attaches the lookahead symbols to grammar rules. + # + # Inputs: lookbacks - Set of lookback relations + # followset - Computed follow set + # + # This function directly attaches the lookaheads to productions contained + # in the lookbacks set + # ----------------------------------------------------------------------------- + + def add_lookaheads(self, lookbacks, followset): + for trans, lb in lookbacks.items(): + # Loop over productions in lookback + for state, p in lb: + if state not in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans, []) + for a in f: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) + + # ----------------------------------------------------------------------------- + # add_lalr_lookaheads() + # + # This function does all of the work of adding lookahead information for use + # with LALR parsing + # ----------------------------------------------------------------------------- + + def add_lalr_lookaheads(self, C): + # Determine all of the nullable nonterminals + nullable = self.compute_nullable_nonterminals() + + # Find all non-terminal transitions + trans = self.find_nonterminal_transitions(C) + + # Compute read sets + readsets = self.compute_read_sets(C, trans, nullable) + + # Compute lookback/includes relations + lookd, included = self.compute_lookback_includes(C, trans, nullable) + + # Compute LALR FOLLOW sets + followsets = self.compute_follow_sets(trans, readsets, included) + + # Add all of the lookaheads + self.add_lookaheads(lookd, followsets) + + # ----------------------------------------------------------------------------- + # lr_parse_table() + # + # This function constructs the parse tables for SLR or LALR + # ----------------------------------------------------------------------------- + def lr_parse_table(self): + Productions = self.grammar.Productions + Precedence = self.grammar.Precedence + goto = self.lr_goto # Goto array + action = self.lr_action # Action array + log = self.log # Logger for output + + actionp = {} # Action production array (temporary) + + log.info('Parsing method: %s', self.lr_method) + + # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items + # This determines the number of states + + C = self.lr0_items() + + if self.lr_method == 'LALR': + self.add_lalr_lookaheads(C) + + # Build the parser table, state by state + st = 0 + for I in C: + # Loop over each production in I + actlist = [] # List of actions + st_action = {} + st_actionp = {} + st_goto = {} + log.info('') + log.info('state %d', st) + log.info('') + for p in I: + log.info(' (%d) %s', p.number, p) + log.info('') + + for p in I: + if p.len == p.lr_index + 1: + if p.name == "S'": + # Start symbol. Accept! + st_action['$end'] = 0 + st_actionp['$end'] = p + else: + # We are at the end of a production. Reduce! + if self.lr_method == 'LALR': + laheads = p.lookaheads[st] + else: + laheads = self.grammar.Follow[p.name] + for a in laheads: + actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) + r = st_action.get(a) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + sprec, slevel = Productions[st_actionp[a].number].prec + rprec, rlevel = Precedence.get(a, ('right', 0)) + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + st_action[a] = -p.number + st_actionp[a] = p + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + Productions[p.number].reduced += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the shift + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + st_action[a] = -p.number + st_actionp[a] = p + chosenp, rejectp = pp, oldp + Productions[p.number].reduced += 1 + Productions[oldp.number].reduced -= 1 + else: + chosenp, rejectp = oldp, pp + self.rr_conflicts.append((st, chosenp, rejectp)) + log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', + a, st_actionp[a].number, st_actionp[a]) + else: + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = -p.number + st_actionp[a] = p + Productions[p.number].reduced += 1 + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if a in self.grammar.Terminals: + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + # We are in a shift state + actlist.append((a, p, 'shift and go to state %d' % j)) + r = st_action.get(a) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + raise LALRError('Shift/shift conflict in state %d' % st) + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + rprec, rlevel = Productions[st_actionp[a].number].prec + sprec, slevel = Precedence.get(a, ('right', 0)) + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + # We decide to shift here... highest precedence to shift + Productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + + else: + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = j + st_actionp[a] = p + + # Print the actions associated with each terminal + _actprint = {} + for a, p, m in actlist: + if a in st_action: + if p is st_actionp[a]: + log.info(' %-15s %s', a, m) + _actprint[(a, m)] = 1 + log.info('') + # Print the actions that were not used. (debugging) + not_used = 0 + for a, p, m in actlist: + if a in st_action: + if p is not st_actionp[a]: + if not (a, m) in _actprint: + log.debug(' ! %-15s [ %s ]', a, m) + not_used = 1 + _actprint[(a, m)] = 1 + if not_used: + log.debug('') + + # Construct the goto table for this state + + nkeys = {} + for ii in I: + for s in ii.usyms: + if s in self.grammar.Nonterminals: + nkeys[s] = None + for n in nkeys: + g = self.lr0_goto(I, n) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + st_goto[n] = j + log.info(' %-30s shift and go to state %d', n, j) + + action[st] = st_action + actionp[st] = st_actionp + goto[st] = st_goto + st += 1 + + # ----------------------------------------------------------------------------- + # write() + # + # This function writes the LR parsing tables to a file + # ----------------------------------------------------------------------------- + + def write_table(self, tabmodule, outputdir='', signature=''): + if isinstance(tabmodule, types.ModuleType): + raise IOError("Won't overwrite existing tabmodule") + + basemodulename = tabmodule.split('.')[-1] + filename = os.path.join(outputdir, basemodulename) + '.py' + try: + f = open(filename, 'w') + + f.write(''' +# %s +# This file is automatically generated. Do not edit. +_tabversion = %r + +_lr_method = %r + +_lr_signature = %r + ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) + + # Change smaller to 0 to go back to original tables + smaller = 1 + + # Factor out names to try and make smaller + if smaller: + items = {} + + for s, nd in self.lr_action.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write('\n_lr_action_items = {') + for k, v in items.items(): + f.write('%r:([' % k) + for i in v[0]: + f.write('%r,' % i) + f.write('],[') + for i in v[1]: + f.write('%r,' % i) + + f.write(']),') + f.write('}\n') + + f.write(''' +_lr_action = {} +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = {} + _lr_action[_x][_k] = _y +del _lr_action_items +''') + + else: + f.write('\n_lr_action = { ') + for k, v in self.lr_action.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') + + if smaller: + # Factor out names to try and make smaller + items = {} + + for s, nd in self.lr_goto.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write('\n_lr_goto_items = {') + for k, v in items.items(): + f.write('%r:([' % k) + for i in v[0]: + f.write('%r,' % i) + f.write('],[') + for i in v[1]: + f.write('%r,' % i) + + f.write(']),') + f.write('}\n') + + f.write(''' +_lr_goto = {} +for _k, _v in _lr_goto_items.items(): + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} + _lr_goto[_x][_k] = _y +del _lr_goto_items +''') + else: + f.write('\n_lr_goto = { ') + for k, v in self.lr_goto.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') + + # Write production table + f.write('_lr_productions = [\n') + for p in self.lr_productions: + if p.func: + f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, + p.func, os.path.basename(p.file), p.line)) + else: + f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) + f.write(']\n') + f.close() + + except IOError as e: + raise + + + # ----------------------------------------------------------------------------- + # pickle_table() + # + # This function pickles the LR parsing tables to a supplied file object + # ----------------------------------------------------------------------------- + + def pickle_table(self, filename, signature=''): + try: + import cPickle as pickle + except ImportError: + import pickle + with open(filename, 'wb') as outf: + pickle.dump(__tabversion__, outf, pickle_protocol) + pickle.dump(self.lr_method, outf, pickle_protocol) + pickle.dump(signature, outf, pickle_protocol) + pickle.dump(self.lr_action, outf, pickle_protocol) + pickle.dump(self.lr_goto, outf, pickle_protocol) + + outp = [] + for p in self.lr_productions: + if p.func: + outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) + else: + outp.append((str(p), p.name, p.len, None, None, None)) + pickle.dump(outp, outf, pickle_protocol) + +# ----------------------------------------------------------------------------- +# === INTROSPECTION === +# +# The following functions and classes are used to implement the PLY +# introspection features followed by the yacc() function itself. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- + +def get_caller_module_dict(levels): + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict + +# ----------------------------------------------------------------------------- +# parse_grammar() +# +# This takes a raw grammar rule string and parses it into production data +# ----------------------------------------------------------------------------- +def parse_grammar(doc, file, line): + grammar = [] + # Split the doc string into lines + pstrings = doc.splitlines() + lastp = None + dline = line + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: + continue + try: + if p[0] == '|': + # This is a continuation of a previous rule + if not lastp: + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) + prodname = lastp + syms = p[1:] + else: + prodname = p[0] + lastp = prodname + syms = p[2:] + assign = p[1] + if assign != ':' and assign != '::=': + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) + + grammar.append((file, dline, prodname, syms)) + except SyntaxError: + raise + except Exception: + raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) + + return grammar + +# ----------------------------------------------------------------------------- +# ParserReflect() +# +# This class represents information extracted for building a parser including +# start symbol, error function, tokens, precedence list, action functions, +# etc. +# ----------------------------------------------------------------------------- +class ParserReflect(object): + def __init__(self, pdict, log=None): + self.pdict = pdict + self.start = None + self.error_func = None + self.tokens = None + self.modules = set() + self.grammar = [] + self.error = False + + if log is None: + self.log = PlyLogger(sys.stderr) + else: + self.log = log + + # Get all of the basic information + def get_all(self): + self.get_start() + self.get_error_func() + self.get_tokens() + self.get_precedence() + self.get_pfunctions() + + # Validate all of the information + def validate_all(self): + self.validate_start() + self.validate_error_func() + self.validate_tokens() + self.validate_precedence() + self.validate_pfunctions() + self.validate_modules() + return self.error + + # Compute a signature over the grammar + def signature(self): + try: + from hashlib import md5 + except ImportError: + from md5 import md5 + try: + sig = md5() + if self.start: + sig.update(self.start.encode('latin-1')) + if self.prec: + sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1')) + if self.tokens: + sig.update(' '.join(self.tokens).encode('latin-1')) + for f in self.pfuncs: + if f[3]: + sig.update(f[3].encode('latin-1')) + except (TypeError, ValueError): + pass + + digest = base64.b16encode(sig.digest()) + if sys.version_info[0] >= 3: + digest = digest.decode('latin-1') + return digest + + # ----------------------------------------------------------------------------- + # validate_modules() + # + # This method checks to see if there are duplicated p_rulename() functions + # in the parser module file. Without this function, it is really easy for + # users to make mistakes by cutting and pasting code fragments (and it's a real + # bugger to try and figure out why the resulting parser doesn't work). Therefore, + # we just do a little regular expression pattern matching of def statements + # to try and detect duplicates. + # ----------------------------------------------------------------------------- + + def validate_modules(self): + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + + for module in self.modules: + try: + lines, linen = inspect.getsourcelines(module) + except IOError: + continue + + counthash = {} + for linen, line in enumerate(lines): + linen += 1 + m = fre.match(line) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) + + # Get the start symbol + def get_start(self): + self.start = self.pdict.get('start') + + # Validate the start symbol + def validate_start(self): + if self.start is not None: + if not isinstance(self.start, string_types): + self.log.error("'start' must be a string") + + # Look for error handler + def get_error_func(self): + self.error_func = self.pdict.get('p_error') + + # Validate the error function + def validate_error_func(self): + if self.error_func: + if isinstance(self.error_func, types.FunctionType): + ismethod = 0 + elif isinstance(self.error_func, types.MethodType): + ismethod = 1 + else: + self.log.error("'p_error' defined, but is not a function or method") + self.error = True + return + + eline = self.error_func.__code__.co_firstlineno + efile = self.error_func.__code__.co_filename + module = inspect.getmodule(self.error_func) + self.modules.add(module) + + argcount = self.error_func.__code__.co_argcount - ismethod + if argcount != 1: + self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) + self.error = True + + # Get the tokens map + def get_tokens(self): + tokens = self.pdict.get('tokens') + if not tokens: + self.log.error('No token list is defined') + self.error = True + return + + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True + return + + if not tokens: + self.log.error('tokens is empty') + self.error = True + return + + self.tokens = tokens + + # Validate the tokens + def validate_tokens(self): + # Validate the tokens. + if 'error' in self.tokens: + self.log.error("Illegal token name 'error'. Is a reserved word") + self.error = True + return + + terminals = set() + for n in self.tokens: + if n in terminals: + self.log.warning('Token %r multiply defined', n) + terminals.add(n) + + # Get the precedence map (if any) + def get_precedence(self): + self.prec = self.pdict.get('precedence') + + # Validate and parse the precedence map + def validate_precedence(self): + preclist = [] + if self.prec: + if not isinstance(self.prec, (list, tuple)): + self.log.error('precedence must be a list or tuple') + self.error = True + return + for level, p in enumerate(self.prec): + if not isinstance(p, (list, tuple)): + self.log.error('Bad precedence table') + self.error = True + return + + if len(p) < 2: + self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) + self.error = True + return + assoc = p[0] + if not isinstance(assoc, string_types): + self.log.error('precedence associativity must be a string') + self.error = True + return + for term in p[1:]: + if not isinstance(term, string_types): + self.log.error('precedence items must be strings') + self.error = True + return + preclist.append((term, assoc, level+1)) + self.preclist = preclist + + # Get all p_functions from the grammar + def get_pfunctions(self): + p_functions = [] + for name, item in self.pdict.items(): + if not name.startswith('p_') or name == 'p_error': + continue + if isinstance(item, (types.FunctionType, types.MethodType)): + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) + module = inspect.getmodule(item) + p_functions.append((line, module, name, item.__doc__)) + + # Sort all of the actions by line number; make sure to stringify + # modules to make them sortable, since `line` may not uniquely sort all + # p functions + p_functions.sort(key=lambda p_function: ( + p_function[0], + str(p_function[1]), + p_function[2], + p_function[3])) + self.pfuncs = p_functions + + # Validate all of the p_functions + def validate_pfunctions(self): + grammar = [] + # Check for non-empty symbols + if len(self.pfuncs) == 0: + self.log.error('no rules of the form p_rulename are defined') + self.error = True + return + + for line, module, name, doc in self.pfuncs: + file = inspect.getsourcefile(module) + func = self.pdict[name] + if isinstance(func, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + if func.__code__.co_argcount > reqargs: + self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) + self.error = True + elif func.__code__.co_argcount < reqargs: + self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) + self.error = True + elif not func.__doc__: + self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', + file, line, func.__name__) + else: + try: + parsed_g = parse_grammar(doc, file, line) + for g in parsed_g: + grammar.append((name, g)) + except SyntaxError as e: + self.log.error(str(e)) + self.error = True + + # Looks like a valid grammar rule + # Mark the file in which defined. + self.modules.add(module) + + # Secondary validation step that looks for p_ definitions that are not functions + # or functions that look like they might be grammar rules. + + for n, v in self.pdict.items(): + if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): + continue + if n.startswith('t_'): + continue + if n.startswith('p_') and n != 'p_error': + self.log.warning('%r not defined as a function', n) + if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + if v.__doc__: + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass + + self.grammar = grammar + +# ----------------------------------------------------------------------------- +# yacc(module) +# +# Build a parser +# ----------------------------------------------------------------------------- + +def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, + check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, + outputdir=None, debuglog=None, errorlog=None, picklefile=None): + + if tabmodule is None: + tabmodule = tab_module + + # Reference to the parsing method of the last built parser + global parse + + # If pickling is enabled, table files are not created + if picklefile: + write_tables = 0 + + if errorlog is None: + errorlog = PlyLogger(sys.stderr) + + # Get the module dictionary used for the parser + if module: + _items = [(k, getattr(module, k)) for k in dir(module)] + pdict = dict(_items) + # If no __file__ attribute is available, try to obtain it from the __module__ instead + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + else: + pdict = get_caller_module_dict(2) + + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If tabmodule specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(tabmodule, types.ModuleType): + srcfile = tabmodule.__file__ + else: + if '.' not in tabmodule: + srcfile = pdict['__file__'] + else: + parts = tabmodule.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = pdict.get('__package__') + if pkg and isinstance(tabmodule, str): + if '.' not in tabmodule: + tabmodule = pkg + '.' + tabmodule + + + + # Set start symbol if it's specified directly using an argument + if start is not None: + pdict['start'] = start + + # Collect parser information from the dictionary + pinfo = ParserReflect(pdict, log=errorlog) + pinfo.get_all() + + if pinfo.error: + raise YaccError('Unable to build parser') + + # Check signature against table files (if any) + signature = pinfo.signature() + + # Read the tables + try: + lr = LRTable() + if picklefile: + read_signature = lr.read_pickle(picklefile) + else: + read_signature = lr.read_table(tabmodule) + if optimize or (read_signature == signature): + try: + lr.bind_callables(pinfo.pdict) + parser = LRParser(lr, pinfo.error_func) + parse = parser.parse + return parser + except Exception as e: + errorlog.warning('There was a problem loading the table file: %r', e) + except VersionError as e: + errorlog.warning(str(e)) + except ImportError: + pass + + if debuglog is None: + if debug: + try: + debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) + except IOError as e: + errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) + debuglog = NullLogger() + else: + debuglog = NullLogger() + + debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) + + errors = False + + # Validate the parser information + if pinfo.validate_all(): + raise YaccError('Unable to build parser') + + if not pinfo.error_func: + errorlog.warning('no p_error() function is defined') + + # Create a grammar object + grammar = Grammar(pinfo.tokens) + + # Set precedence level for terminals + for term, assoc, level in pinfo.preclist: + try: + grammar.set_precedence(term, assoc, level) + except GrammarError as e: + errorlog.warning('%s', e) + + # Add productions to the grammar + for funcname, gram in pinfo.grammar: + file, line, prodname, syms = gram + try: + grammar.add_production(prodname, syms, funcname, file, line) + except GrammarError as e: + errorlog.error('%s', e) + errors = True + + # Set the grammar start symbols + try: + if start is None: + grammar.set_start(pinfo.start) + else: + grammar.set_start(start) + except GrammarError as e: + errorlog.error(str(e)) + errors = True + + if errors: + raise YaccError('Unable to build parser') + + # Verify the grammar structure + undefined_symbols = grammar.undefined_symbols() + for sym, prod in undefined_symbols: + errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) + errors = True + + unused_terminals = grammar.unused_terminals() + if unused_terminals: + debuglog.info('') + debuglog.info('Unused terminals:') + debuglog.info('') + for term in unused_terminals: + errorlog.warning('Token %r defined, but not used', term) + debuglog.info(' %s', term) + + # Print out all productions to the debug log + if debug: + debuglog.info('') + debuglog.info('Grammar') + debuglog.info('') + for n, p in enumerate(grammar.Productions): + debuglog.info('Rule %-5d %s', n, p) + + # Find unused non-terminals + unused_rules = grammar.unused_rules() + for prod in unused_rules: + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) + + if len(unused_terminals) == 1: + errorlog.warning('There is 1 unused token') + if len(unused_terminals) > 1: + errorlog.warning('There are %d unused tokens', len(unused_terminals)) + + if len(unused_rules) == 1: + errorlog.warning('There is 1 unused rule') + if len(unused_rules) > 1: + errorlog.warning('There are %d unused rules', len(unused_rules)) + + if debug: + debuglog.info('') + debuglog.info('Terminals, with rules where they appear') + debuglog.info('') + terms = list(grammar.Terminals) + terms.sort() + for term in terms: + debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info('') + debuglog.info('Nonterminals, with rules where they appear') + debuglog.info('') + nonterms = list(grammar.Nonterminals) + nonterms.sort() + for nonterm in nonterms: + debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info('') + + if check_recursion: + unreachable = grammar.find_unreachable() + for u in unreachable: + errorlog.warning('Symbol %r is unreachable', u) + + infinite = grammar.infinite_cycles() + for inf in infinite: + errorlog.error('Infinite recursion detected for symbol %r', inf) + errors = True + + unused_prec = grammar.unused_precedence() + for term, assoc in unused_prec: + errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) + errors = True + + if errors: + raise YaccError('Unable to build parser') + + # Run the LRGeneratedTable on the grammar + if debug: + errorlog.debug('Generating %s tables', method) + + lr = LRGeneratedTable(grammar, method, debuglog) + + if debug: + num_sr = len(lr.sr_conflicts) + + # Report shift/reduce and reduce/reduce conflicts + if num_sr == 1: + errorlog.warning('1 shift/reduce conflict') + elif num_sr > 1: + errorlog.warning('%d shift/reduce conflicts', num_sr) + + num_rr = len(lr.rr_conflicts) + if num_rr == 1: + errorlog.warning('1 reduce/reduce conflict') + elif num_rr > 1: + errorlog.warning('%d reduce/reduce conflicts', num_rr) + + # Write out conflicts to the output file + if debug and (lr.sr_conflicts or lr.rr_conflicts): + debuglog.warning('') + debuglog.warning('Conflicts:') + debuglog.warning('') + + for state, tok, resolution in lr.sr_conflicts: + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + + already_reported = set() + for state, rule, rejected in lr.rr_conflicts: + if (state, id(rule), id(rejected)) in already_reported: + continue + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + errorlog.warning('rejected rule (%s) in state %d', rejected, state) + already_reported.add((state, id(rule), id(rejected))) + + warned_never = [] + for state, rule, rejected in lr.rr_conflicts: + if not rejected.reduced and (rejected not in warned_never): + debuglog.warning('Rule (%s) is never reduced', rejected) + errorlog.warning('Rule (%s) is never reduced', rejected) + warned_never.append(rejected) + + # Write the table file if requested + if write_tables: + try: + lr.write_table(tabmodule, outputdir, signature) + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) + + # Write a pickled version of the tables + if picklefile: + try: + lr.pickle_table(picklefile, signature) + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) + + # Build the parser + lr.bind_callables(pinfo.pdict) + parser = LRParser(lr, pinfo.error_func) + + parse = parser.parse + return parser diff --git a/ply/ygen.py b/ply/ygen.py new file mode 100644 index 0000000..acf5ca1 --- /dev/null +++ b/ply/ygen.py @@ -0,0 +1,74 @@ +# ply: ygen.py +# +# This is a support program that auto-generates different versions of the YACC parsing +# function with different features removed for the purposes of performance. +# +# Users should edit the method LParser.parsedebug() in yacc.py. The source code +# for that method is then used to create the other methods. See the comments in +# yacc.py for further details. + +import os.path +import shutil + +def get_source_range(lines, tag): + srclines = enumerate(lines) + start_tag = '#--! %s-start' % tag + end_tag = '#--! %s-end' % tag + + for start_index, line in srclines: + if line.strip().startswith(start_tag): + break + + for end_index, line in srclines: + if line.strip().endswith(end_tag): + break + + return (start_index + 1, end_index) + +def filter_section(lines, tag): + filtered_lines = [] + include = True + tag_text = '#--! %s' % tag + for line in lines: + if line.strip().startswith(tag_text): + include = not include + elif include: + filtered_lines.append(line) + return filtered_lines + +def main(): + dirname = os.path.dirname(__file__) + shutil.copy2(os.path.join(dirname, 'yacc.py'), os.path.join(dirname, 'yacc.py.bak')) + with open(os.path.join(dirname, 'yacc.py'), 'r') as f: + lines = f.readlines() + + parse_start, parse_end = get_source_range(lines, 'parsedebug') + parseopt_start, parseopt_end = get_source_range(lines, 'parseopt') + parseopt_notrack_start, parseopt_notrack_end = get_source_range(lines, 'parseopt-notrack') + + # Get the original source + orig_lines = lines[parse_start:parse_end] + + # Filter the DEBUG sections out + parseopt_lines = filter_section(orig_lines, 'DEBUG') + + # Filter the TRACKING sections out + parseopt_notrack_lines = filter_section(parseopt_lines, 'TRACKING') + + # Replace the parser source sections with updated versions + lines[parseopt_notrack_start:parseopt_notrack_end] = parseopt_notrack_lines + lines[parseopt_start:parseopt_end] = parseopt_lines + + lines = [line.rstrip()+'\n' for line in lines] + with open(os.path.join(dirname, 'yacc.py'), 'w') as f: + f.writelines(lines) + + print('Updated yacc.py') + +if __name__ == '__main__': + main() + + + + + diff --git a/tests/ServiceTools.idl b/tests/ServiceTools.idl new file mode 100644 index 0000000..2134a4b --- /dev/null +++ b/tests/ServiceTools.idl @@ -0,0 +1,91 @@ +/** + * + */ + +const long SERV_ITEM_NB = 127; +const long ITEM_SERVICE_SIZE = 30; +const long ITEM_VALUE_SIZE = 8; + +const long SERV_STAT_ERROR = -1; +const long SERV_STAT_NO_ERROR = 0; + +const long SWL_NO_ERROR = 0; + + + +typedef octet byte; +typedef byte g_swl_status; + +struct g_service_item +{ + char service [ITEM_SERVICE_SIZE] ; + long value ; + long id; +} ; + +struct t_service +{ + long format_version ; + long key_type; + unsigned long serial_nb; + long item_nb; + g_service_item serv_item[SERV_ITEM_NB]; +} ; + +struct g_feature_item +{ + char feature [ITEM_SERVICE_SIZE] ; + long id ; +} ; + +struct t_model_feat +{ + long item_nb; + g_feature_item feat_item[SERV_ITEM_NB]; +} ; + +/** + * This is a test attempt + */ +[uuid(ca33129a-38a6-41f3-9608-8aba1ca4d0a0)] +local interface ServiceTools +{ + /** + * @param filename is declared with type out in order to get a pointer + */ + g_swl_status servSparse_service( out char filename + , inout t_service serv + , in long size + , in t_model_feat model_feature); + + long servIDvalue( out char index + , in t_model_feat model_feature); + + long servStrvalue( in long id + , out char index + , in t_model_feat model_feature); + + g_swl_status servSservice_file( out char filename + , inout t_service parsed); + + long servSfind_item( out char item_name + , inout t_service parsed + , in long start_index); + + long servIitem_value( in long index, inout t_service parsed); + + /** + * Issue with fserv_file and item_name. Must be null terminated string + */ + g_swl_status servSfind_service_in_file( out char fserv_file + , out char item_name + , out long value); + + g_swl_status servSis_internet_services( out char fserv_file, out long value); + + g_swl_status servSis_voip_services( out char fserv_file, out long value); + + g_swl_status servSis_ras_services( out char fserv_file, out long value); + + g_swl_status servSis_acd_services( out char fserv_file, out long value); +}; diff --git a/tests/test.idl b/tests/test.idl new file mode 100644 index 0000000..6edd512 --- /dev/null +++ b/tests/test.idl @@ -0,0 +1,89 @@ +/** + + */ +%{C++ + +#include +%} + +native MAX(Max) ; +/* This fails + +*/ +const long MAX_ARRAY = 10; + +interface IInstantMessaging; +typedef long IMResultCode; +typedef long CstaResultCode; +typedef octet byte; +typedef short word; + +[uuid(a908d51f-cc44-4053-af1e-8c72ce2ca7ec)] +interface IToto { + /* Comment seems to be required*/ + const long v1 = 10; + byte testchar(out char file_serv, in char oc); +}; + +/** + * Main entry point for data ID's + */ +struct subs_short_info { + byte idx; + word toto; + word arr[MAX_ARRAY]; +} ; + +enum eValues {one , two , three } ; + +/** + Attempt to describe Instant Messaging Service In IDL Language + */ +[scriptable , uuid(317b15de-25a5-426b-80e0-446f455b7176)] +interface IInstantMessaging +{ + IMResultCode sendIm(in void toto); + + IMResultCode getImMessages(in long test); + + IMResultCode getImSummary(inout IMResultCode t); + + IMResultCode getImCounters(); + + IMResultCode updateIm(); + + IMResultCode replyIm(); + + IMResultCode deleteIm(); +}; + +[scriptable , uuid(317b15de-25a5-426b-80e0-446f455b7176)] +interface INative +{ + [noscript] long getUUID(in long nothin); +}; + +module M1 { + + interface IUnknown ; + + [ uuid(317b15de-25a5-426b-80e0-446f455b7177)] + interface ICsta : INative { + CstaResultCode makeCall(in long callId); + }; +}; + +component CAddressBook { + provides IInstantMessaging asMessaging; + provides M1::IUnknown asUnknown; + uses INative asNative; + /* Comment */ + [noscript] attribute long status; +}; + +home HomeAddressBook manages CAddressBook { + + factory ABFactory (); + factory ABCFactory (in long t); +}; +