diff options
Diffstat (limited to 'lib/python2.7/idlelib/HyperParser.py')
-rw-r--r-- | lib/python2.7/idlelib/HyperParser.py | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/lib/python2.7/idlelib/HyperParser.py b/lib/python2.7/idlelib/HyperParser.py new file mode 100644 index 0000000..5816d00 --- /dev/null +++ b/lib/python2.7/idlelib/HyperParser.py @@ -0,0 +1,255 @@ +"""Provide advanced parsing abilities for ParenMatch and other extensions. + +HyperParser uses PyParser. PyParser mostly gives information on the +proper indentation of code. HyperParser gives additional information on +the structure of code. +""" + +import string +import keyword +from idlelib import PyParse + +class HyperParser: + + def __init__(self, editwin, index): + "To initialize, analyze the surroundings of the given index." + + self.editwin = editwin + self.text = text = editwin.text + + parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth) + + def index2line(index): + return int(float(index)) + lno = index2line(text.index(index)) + + if not editwin.context_use_ps1: + for context in editwin.num_context_lines: + startat = max(lno - context, 1) + startatindex = repr(startat) + ".0" + stopatindex = "%d.end" % lno + # We add the newline because PyParse requires a newline + # at end. We add a space so that index won't be at end + # of line, so that its status will be the same as the + # char before it, if should. + parser.set_str(text.get(startatindex, stopatindex)+' \n') + bod = parser.find_good_parse_start( + editwin._build_char_in_string_func(startatindex)) + if bod is not None or startat == 1: + break + parser.set_lo(bod or 0) + else: + r = text.tag_prevrange("console", index) + if r: + startatindex = r[1] + else: + startatindex = "1.0" + stopatindex = "%d.end" % lno + # We add the newline because PyParse requires it. We add a + # space so that index won't be at end of line, so that its + # status will be the same as the char before it, if should. + parser.set_str(text.get(startatindex, stopatindex)+' \n') + parser.set_lo(0) + + # We want what the parser has, minus the last newline and space. + self.rawtext = parser.str[:-2] + # Parser.str apparently preserves the statement we are in, so + # that stopatindex can be used to synchronize the string with + # the text box indices. + self.stopatindex = stopatindex + self.bracketing = parser.get_last_stmt_bracketing() + # find which pairs of bracketing are openers. These always + # correspond to a character of rawtext. + self.isopener = [i>0 and self.bracketing[i][1] > + self.bracketing[i-1][1] + for i in range(len(self.bracketing))] + + self.set_index(index) + + def set_index(self, index): + """Set the index to which the functions relate. + + The index must be in the same statement. + """ + indexinrawtext = (len(self.rawtext) - + len(self.text.get(index, self.stopatindex))) + if indexinrawtext < 0: + raise ValueError("Index %s precedes the analyzed statement" + % index) + self.indexinrawtext = indexinrawtext + # find the rightmost bracket to which index belongs + self.indexbracket = 0 + while (self.indexbracket < len(self.bracketing)-1 and + self.bracketing[self.indexbracket+1][0] < self.indexinrawtext): + self.indexbracket += 1 + if (self.indexbracket < len(self.bracketing)-1 and + self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and + not self.isopener[self.indexbracket+1]): + self.indexbracket += 1 + + def is_in_string(self): + """Is the index given to the HyperParser in a string?""" + # The bracket to which we belong should be an opener. + # If it's an opener, it has to have a character. + return (self.isopener[self.indexbracket] and + self.rawtext[self.bracketing[self.indexbracket][0]] + in ('"', "'")) + + def is_in_code(self): + """Is the index given to the HyperParser in normal code?""" + return (not self.isopener[self.indexbracket] or + self.rawtext[self.bracketing[self.indexbracket][0]] + not in ('#', '"', "'")) + + def get_surrounding_brackets(self, openers='([{', mustclose=False): + """Return bracket indexes or None. + + If the index given to the HyperParser is surrounded by a + bracket defined in openers (or at least has one before it), + return the indices of the opening bracket and the closing + bracket (or the end of line, whichever comes first). + + If it is not surrounded by brackets, or the end of line comes + before the closing bracket and mustclose is True, returns None. + """ + + bracketinglevel = self.bracketing[self.indexbracket][1] + before = self.indexbracket + while (not self.isopener[before] or + self.rawtext[self.bracketing[before][0]] not in openers or + self.bracketing[before][1] > bracketinglevel): + before -= 1 + if before < 0: + return None + bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) + after = self.indexbracket + 1 + while (after < len(self.bracketing) and + self.bracketing[after][1] >= bracketinglevel): + after += 1 + + beforeindex = self.text.index("%s-%dc" % + (self.stopatindex, len(self.rawtext)-self.bracketing[before][0])) + if (after >= len(self.bracketing) or + self.bracketing[after][0] > len(self.rawtext)): + if mustclose: + return None + afterindex = self.stopatindex + else: + # We are after a real char, so it is a ')' and we give the + # index before it. + afterindex = self.text.index( + "%s-%dc" % (self.stopatindex, + len(self.rawtext)-(self.bracketing[after][0]-1))) + + return beforeindex, afterindex + + # Ascii chars that may be in a white space + _whitespace_chars = " \t\n\\" + # Ascii chars that may be in an identifier + _id_chars = string.ascii_letters + string.digits + "_" + # Ascii chars that may be the first char of an identifier + _id_first_chars = string.ascii_letters + "_" + + # Given a string and pos, return the number of chars in the + # identifier which ends at pos, or 0 if there is no such one. Saved + # words are not identifiers. + def _eat_identifier(self, str, limit, pos): + i = pos + while i > limit and str[i-1] in self._id_chars: + i -= 1 + if (i < pos and (str[i] not in self._id_first_chars or + keyword.iskeyword(str[i:pos]))): + i = pos + return pos - i + + def get_expression(self): + """Return a string with the Python expression which ends at the + given index, which is empty if there is no real one. + """ + if not self.is_in_code(): + raise ValueError("get_expression should only be called" + "if index is inside a code.") + + rawtext = self.rawtext + bracketing = self.bracketing + + brck_index = self.indexbracket + brck_limit = bracketing[brck_index][0] + pos = self.indexinrawtext + + last_identifier_pos = pos + postdot_phase = True + + while 1: + # Eat whitespaces, comments, and if postdot_phase is False - a dot + while 1: + if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: + # Eat a whitespace + pos -= 1 + elif (not postdot_phase and + pos > brck_limit and rawtext[pos-1] == '.'): + # Eat a dot + pos -= 1 + postdot_phase = True + # The next line will fail if we are *inside* a comment, + # but we shouldn't be. + elif (pos == brck_limit and brck_index > 0 and + rawtext[bracketing[brck_index-1][0]] == '#'): + # Eat a comment + brck_index -= 2 + brck_limit = bracketing[brck_index][0] + pos = bracketing[brck_index+1][0] + else: + # If we didn't eat anything, quit. + break + + if not postdot_phase: + # We didn't find a dot, so the expression end at the + # last identifier pos. + break + + ret = self._eat_identifier(rawtext, brck_limit, pos) + if ret: + # There is an identifier to eat + pos = pos - ret + last_identifier_pos = pos + # Now, to continue the search, we must find a dot. + postdot_phase = False + # (the loop continues now) + + elif pos == brck_limit: + # We are at a bracketing limit. If it is a closing + # bracket, eat the bracket, otherwise, stop the search. + level = bracketing[brck_index][1] + while brck_index > 0 and bracketing[brck_index-1][1] > level: + brck_index -= 1 + if bracketing[brck_index][0] == brck_limit: + # We were not at the end of a closing bracket + break + pos = bracketing[brck_index][0] + brck_index -= 1 + brck_limit = bracketing[brck_index][0] + last_identifier_pos = pos + if rawtext[pos] in "([": + # [] and () may be used after an identifier, so we + # continue. postdot_phase is True, so we don't allow a dot. + pass + else: + # We can't continue after other types of brackets + if rawtext[pos] in "'\"": + # Scan a string prefix + while pos > 0 and rawtext[pos - 1] in "rRbBuU": + pos -= 1 + last_identifier_pos = pos + break + + else: + # We've found an operator or something. + break + + return rawtext[last_identifier_pos:self.indexinrawtext] + + +if __name__ == '__main__': + import unittest + unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2) |