#---------------------------------------------------------------------- # python2c.py joe@strout.net # # This is a cheezy little program that converts some Python code # into C++ code. It has a very limited range, but it does a good # job on the example code it was built around! =) # # first release: 3/21/97 JJS # # Changes by Dirk Heise 01-12-98 (Thanks, Dirk!) # - created a very simple file interface # - some more rules for C++ support # - added another "action" that may contain a python statement to # be executed on match, so a match can trigger something # - create a class header # - temporarily buffer output in several lists of strings # - added DEVELOPING option to help in examining operation of rules #---------------------------------------------------------------------- import regex import string import wmod # dirk heise, for debug output - not a standard module # supplies a function DEBUGWRITE(str), that's all DEVELOPING = 1 # set this to 0 for simple translation # set this to 1 if you want to have supplementary comments in the # generated C++ code that help finding out what PYTHON2C did # dirk heise: # ---------------------------- RULES ------------------------------ # All these rules work on single lines of input text ! # Every rule consists of three strings (the third may be None) # - regex # If you wanna create new rules: # the regex must contain one or more "$.*$" patterns, # - replacement rule # the replacement rule can refer to the patterns mentioned above # with "^1","^2", "^3" etc. to insert the substring swallowed by that subexpr. # - None or a Python statement to be executed on match. # When this statement is executed, it can refer to the subexpressions # eaten by the regex as "sub[1]", "sub[2]" etc. # you can use this to store info from the parsed text into string # variables. # IMPORTANT : the line you're defining works in a local namespace; # it can NOT setup global variables in this program directly # (i suspect it might be a bug in Python1.4) # My workaround suggestion: # When you define such a line, simply call a function you define yourself! # That function (see SetClassName() below) can access every global object. # The rules are applied from top to bottom of list! # You can exploit this by first catching special cases and later # catch more generalized cases! (In other words, the sequence order # might be important) trans = [ # COMMENTS # 0 ["$.*$#$.*$", "^1//^2", None], # STRING LITERALS # 1 ["$.*$'$.*$'$.*$", '^1"^2"^3', None], # WHILE LOOPS # 2 ["while $.*$:$.*$", "while (^1)^2", None], # FOR LOOPS # loops that iterate integers can easily be converted: # 3 ["for $.*$ in range($.*$,$.*$):$.*$", "for (int ^1=^2; ^1<^3; ^1++) {^4", None], # an attempt to make sense of loops that iterate over some sequence/list: # (rule sequence is important here as the following rule is a superset # of the last one!): # 4 ["for $.*$ in $.*$:$.*$", "for (int ^1i=0; ^1i<^2.Length(); ^1i++) { int ^1 = ^2[^1i]; ^3", None], # Here, i assume that a Python sequence is represented by some # C++ container, and this container offers a method Length() # to find out its number of elements. # While a Python loop does not need an int counter variable for # this, iterating a C++ dynamic array-like container requires # a counter int. And it requires accessing the container content # explicitly. This rule constructs some code for that. # Even if it doesn't compile, it'll notify you of the necessity of # explicit indirection, it's a thing easily overlooked. # TODO : replace Length() with something more flexible # or define a complete container interface somewhere... # IF LINES # 5 ["if $.*$:$.*$", "if (^1)^2", None], # ELSE LINES # 6 ["else:$.*$", "else^1", None], # PRINT LINES # 7 ["print $.*$,$", "cout << ^1 << ' ';", None], # 8 ["print $.*$", "cout << ^1 << endl;", None], # INPUT STATEMENTS # 9 ['$.*$=$.*$raw_input("$.*$")$.*$', 'cout << "^3"; cin >> ^1;^4', None], # 10 ["$.*$=$.*$raw_input($.*$)$.*$", "cin >> ^1;^4", None], # 11 ['$.*$=$.*$input("$.*$")$.*$', 'cout << "^3"; cin >> ^1;^4', None], # 12 ["$.*$=$.*$input($.*$)$.*$", "cin >> ^1;^4", None], # C++ RULES # some more rules by dirk heise # MEMBER VARIABLE PREFIXES (TREATING "SELF.") # this is done by two rules, the sequence is important! # 13 #["$.*$self\.$\(\|[a-z]\|[A-Z]$+\)($.*$" , "^1^2(^4" , None], # this catches "self.id(" # first catch function calls to the object itself, and simply kill # ".self" # TODO this regex fails... why? and find an easier way to catch # id char set ["$.*$self\.$.*$" , "^1m_^2" , None], # catch the rest: member variable accesses # this rule assumes the C++ programmer has the habit of calling member variables # "m_something" (which is a habit of mine) # Change this rule to fit your personal C++ naming conventions! # CLASS DECLARATIONS ["class $.*$:" , "", "SetClassName(sub[1])"], # assign the detected class name to a global string # FUNCTION & METHOD DECLARATIONS # first catch method declarations: ["def $.*$(self):$.*$" , "void ^c::^1()^2", None], ["def $.*$(self,$.*$" , "void ^c::^1(^2", None], # put classname in front of function name, eat parameter "self" # the "void" is just a guess, of course. # TODO : ^c for classname is quite arbitrary. # Setting up "classname" is okay cause its a clean way of # extending but ^c is built into the translate function and # it shouldn't # # now catch normal function declarations (they have no "self" argument): ["def $.*$" , "void ^1", None], # again, the void is a guess. ] # --------------------- EXTENSIONS -------------------------- # These variables and functions are used by user-defined python statements # (see descriptions of rules) header = [] # will store list of strings for class header # only used when a class definition is found def hprint(s): # append string to class header text header.append(s) classname = "" # dirk heise # global variable to keep a detected class name def SetClassName(s): # dirk heise # set up class name , to be used in user executable statements # i suppose here that this function is called when a Python class # is defined. # So create some code that will work as a template for a header file global classname classname = s hprint ("VERY ROUGH HEADER FILE TEMPLATE FOR CLASS "+classname) hprint ("copy this into its own file and refine it by hand." ) hprint ("// "+classname+".H" ) hprint ("//" ) hprint ("//" ) hprint ("#ifndef _"+classname+"_H_" ) hprint ("#define _"+classname+"_H_" ) hprint ('#include "globs.h"' ) hprint ("class "+classname ) hprint (" {" ) hprint (" public:" ) hprint (" "+classname+"();" ) hprint (" virtual ~"+classname+"();" ) hprint (" protected:" ) hprint (" private:" ) hprint (" };" ) hprint ("#endif // _"+classname+"_H_" ) hprint ("END OF HEADER FILE TEMPLATE FOR CLASS "+classname ) # TODO why all the mess with hprint? Well, the idea is to extend this # one: First write only until destructor prototype, later # when fetching a "def NAME(" "print" translation and "hprint" # line as prototype (so this header file will contain # more accurate info) # In the end, "hprint" the rest of the header file. # dirk heise: added parameter exe def translate(s,keys,values,exe): # translate line s # find a match among keys # returns transformed "s" and a history string telling numbers of # transformations applied, in the form of a C++ comment global classname # dirk heise changed = 1 history = "" # history builds up a string of transformation numbers so later we can # see what trafos have been applied while changed: changed = 0 for i in range(0,len(keys)): if keys[i].match(s) >= 0: # found a match ... apply translation history = history + str(i) + " " # make sure history string entries are separated by spaces # to facilitate parsing these comments later (if someone wants # to) s = values[i] # we've got a response... stuff in adjusted text where indicated pos = string.find(s,'^') while pos > -1: # dirk heise : special : ^c means "classname" # TODO: this is a nonsystematic hasty improvement hack # (see the annotation in the rules section, seek TODO) if s[pos+1] == 'c' : # insert "classname" into our string left = s[:pos] right = s[pos+2:] k = classname else: num = string.atoi(s[pos+1:pos+2]) # s = s[:pos] + keys[i].group(num) + s[pos+2:] # dirk heise : i splitted that to make it more understandable # for me left = s[:pos] right = s[pos+2:] k = keys[i].group(num) if k==None : # give advice: raise "Error in rule: missing a \$.*\$ pattern in regex!" s = left + k + right # find another caret: pos = string.find(s,'^') # dirk heise : execute user statement if one is given: if exe[i] <> None : # before execution, setup "environment" strings: sub = [] sub.append("") k = " " num = 1 while num <> 0: k = keys[i].group(num) if k==None : num = 0 # to quit the loop else: num = num + 1 sub.append(k) # sub is now a list of strings containing parsed subexpressions exec(exe[i]) changed = 1 # check for more matches! # special case: add semicolon after most statements pos = string.find(s+"//", "//") endpos = len(string.rstrip(s[:pos])) - 1 if s <> "": # dirk heise: to allow rules that return an empty string endchar = s[endpos] if endpos >= 3 and s[endpos-3:endpos+1] == 'else' and \ (endpos == 3 or s[endpos-4] in " \t"): # found dangling keyword -- no semicolon needed return (s," //$$$ trafos applied: "+history) if endpos > 0 and endchar not in "{});": s = s[:endpos+1] + ';' + s[endpos+1:] return (s," //$$$ trafos applied: "+history) # I use "//$$$" as a marker for the history string to facilitate later # automatic wipeaway of these comments # dirk heise: added parameter exe : def processLine(s,keys,values,exe): # find the indentation global gIndents qtywhitechars = regex.match("[\t ]*", s) if qtywhitechars > -1: whitechars = s[:qtywhitechars] else: whitechars = '' if len(whitechars) > len(gIndents[-1]): print gIndents[-1] + "{" gIndents.append(whitechars) else: while gIndents and gIndents[-1] != whitechars: del gIndents[-1] if gIndents: print gIndents[-1] + "}" # if not gIndents: raise "Inconsistent indentation" # dirk heise: Come on... Never give up! if not gIndents: print "WARNING! Inconsistent indentation." gIndents.append(whitechars) # dirk heise: added exe , take care for history return value: s,history = translate(s[qtywhitechars:], keys, values, exe) return gIndents[-1] + s , gIndents[-1] + " " + history # set up gKeys and gValues # dirk heise: and gExe gKeys = map(lambda x:regex.compile(x[0]), trans) gValues = map(lambda x:x[1], trans) gExe = map(lambda x:x[2], trans) gEndWhite = regex.compile("$.*$$[ \t]*$$") gIndents = [''] s = "" # Dirk Heise 12.01.97 : commented this away # print "Enter python code below, 'quit' when done." # while s != 'quit': # s = raw_input() # print processLine(s, gKeys, gValues,gExe) # Dirk Heise 12.01.97 : a very simple file interface. # Modified by JJS to be platform-independent. s = raw_input("Enter pathname of .py file:") try: f = open(s) lines = f.readlines() for s in lines: # wmod.DEBUGWRITE("PROCESSING <"+s+">") cs,history = processLine(s, gKeys, gValues,gExe) print cs if DEVELOPING : # print numbers of transformations applied print history # now output class header if there is one: for s in header: print s except IOError: result.SetFailure("File not found!") # #