summaryrefslogtreecommitdiff
path: root/venv/Lib/site-packages/pylint/checkers/similar.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/Lib/site-packages/pylint/checkers/similar.py')
-rw-r--r--venv/Lib/site-packages/pylint/checkers/similar.py452
1 files changed, 452 insertions, 0 deletions
diff --git a/venv/Lib/site-packages/pylint/checkers/similar.py b/venv/Lib/site-packages/pylint/checkers/similar.py
new file mode 100644
index 0000000..019b55f
--- /dev/null
+++ b/venv/Lib/site-packages/pylint/checkers/similar.py
@@ -0,0 +1,452 @@
+# Copyright (c) 2006, 2008-2014 LOGILAB S.A. (Paris, FRANCE) <contact@logilab.fr>
+# Copyright (c) 2012 Ry4an Brase <ry4an-hg@ry4an.org>
+# Copyright (c) 2012 Google, Inc.
+# Copyright (c) 2012 Anthony VEREZ <anthony.verez.external@cassidian.com>
+# Copyright (c) 2014-2018 Claudiu Popa <pcmanticore@gmail.com>
+# Copyright (c) 2014 Brett Cannon <brett@python.org>
+# Copyright (c) 2014 Arun Persaud <arun@nubati.net>
+# Copyright (c) 2015 Ionel Cristian Maries <contact@ionelmc.ro>
+# Copyright (c) 2017 Anthony Sottile <asottile@umich.edu>
+# Copyright (c) 2017 Mikhail Fesenko <proggga@gmail.com>
+# Copyright (c) 2018 ssolanki <sushobhitsolanki@gmail.com>
+
+# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
+# For details: https://github.com/PyCQA/pylint/blob/master/COPYING
+
+# pylint: disable=redefined-builtin
+"""a similarities / code duplication command line tool and pylint checker
+"""
+
+import sys
+from collections import defaultdict
+from getopt import getopt
+from itertools import groupby
+
+import astroid
+
+from pylint.checkers import BaseChecker, table_lines_from_stats
+from pylint.interfaces import IRawChecker
+from pylint.reporters.ureports.nodes import Table
+from pylint.utils import decoding_stream
+
+
+class Similar:
+ """finds copy-pasted lines of code in a project"""
+
+ def __init__(
+ self,
+ min_lines=4,
+ ignore_comments=False,
+ ignore_docstrings=False,
+ ignore_imports=False,
+ ):
+ self.min_lines = min_lines
+ self.ignore_comments = ignore_comments
+ self.ignore_docstrings = ignore_docstrings
+ self.ignore_imports = ignore_imports
+ self.linesets = []
+
+ def append_stream(self, streamid, stream, encoding=None):
+ """append a file to search for similarities"""
+ if encoding is None:
+ readlines = stream.readlines
+ else:
+ readlines = decoding_stream(stream, encoding).readlines
+ try:
+ self.linesets.append(
+ LineSet(
+ streamid,
+ readlines(),
+ self.ignore_comments,
+ self.ignore_docstrings,
+ self.ignore_imports,
+ )
+ )
+ except UnicodeDecodeError:
+ pass
+
+ def run(self):
+ """start looking for similarities and display results on stdout"""
+ self._display_sims(self._compute_sims())
+
+ def _compute_sims(self):
+ """compute similarities in appended files"""
+ no_duplicates = defaultdict(list)
+ for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():
+ duplicate = no_duplicates[num]
+ for couples in duplicate:
+ if (lineset1, idx1) in couples or (lineset2, idx2) in couples:
+ couples.add((lineset1, idx1))
+ couples.add((lineset2, idx2))
+ break
+ else:
+ duplicate.append({(lineset1, idx1), (lineset2, idx2)})
+ sims = []
+ for num, ensembles in no_duplicates.items():
+ for couples in ensembles:
+ sims.append((num, couples))
+ sims.sort()
+ sims.reverse()
+ return sims
+
+ def _display_sims(self, sims):
+ """display computed similarities on stdout"""
+ nb_lignes_dupliquees = 0
+ for num, couples in sims:
+ print()
+ print(num, "similar lines in", len(couples), "files")
+ couples = sorted(couples)
+ lineset = idx = None
+ for lineset, idx in couples:
+ print("==%s:%s" % (lineset.name, idx))
+ if lineset:
+ for line in lineset._real_lines[idx : idx + num]:
+ print(" ", line.rstrip())
+ nb_lignes_dupliquees += num * (len(couples) - 1)
+ nb_total_lignes = sum([len(lineset) for lineset in self.linesets])
+ print(
+ "TOTAL lines=%s duplicates=%s percent=%.2f"
+ % (
+ nb_total_lignes,
+ nb_lignes_dupliquees,
+ nb_lignes_dupliquees * 100.0 / nb_total_lignes,
+ )
+ )
+
+ def _find_common(self, lineset1, lineset2):
+ """find similarities in the two given linesets"""
+ lines1 = lineset1.enumerate_stripped
+ lines2 = lineset2.enumerate_stripped
+ find = lineset2.find
+ index1 = 0
+ min_lines = self.min_lines
+ while index1 < len(lineset1):
+ skip = 1
+ num = 0
+ for index2 in find(lineset1[index1]):
+ non_blank = 0
+ for num, ((_, line1), (_, line2)) in enumerate(
+ zip(lines1(index1), lines2(index2))
+ ):
+ if line1 != line2:
+ if non_blank > min_lines:
+ yield num, lineset1, index1, lineset2, index2
+ skip = max(skip, num)
+ break
+ if line1:
+ non_blank += 1
+ else:
+ # we may have reach the end
+ num += 1
+ if non_blank > min_lines:
+ yield num, lineset1, index1, lineset2, index2
+ skip = max(skip, num)
+ index1 += skip
+
+ def _iter_sims(self):
+ """iterate on similarities among all files, by making a cartesian
+ product
+ """
+ for idx, lineset in enumerate(self.linesets[:-1]):
+ for lineset2 in self.linesets[idx + 1 :]:
+ for sim in self._find_common(lineset, lineset2):
+ yield sim
+
+
+def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
+ """return lines with leading/trailing whitespace and any ignored code
+ features removed
+ """
+ if ignore_imports:
+ tree = astroid.parse("".join(lines))
+ node_is_import_by_lineno = (
+ (node.lineno, isinstance(node, (astroid.Import, astroid.ImportFrom)))
+ for node in tree.body
+ )
+ line_begins_import = {
+ lineno: all(is_import for _, is_import in node_is_import_group)
+ for lineno, node_is_import_group in groupby(
+ node_is_import_by_lineno, key=lambda x: x[0]
+ )
+ }
+ current_line_is_import = False
+
+ strippedlines = []
+ docstring = None
+ for lineno, line in enumerate(lines, start=1):
+ line = line.strip()
+ if ignore_docstrings:
+ if not docstring and any(
+ line.startswith(i) for i in ['"""', "'''", 'r"""', "r'''"]
+ ):
+ docstring = line[:3]
+ line = line[3:]
+ if docstring:
+ if line.endswith(docstring):
+ docstring = None
+ line = ""
+ if ignore_imports:
+ current_line_is_import = line_begins_import.get(
+ lineno, current_line_is_import
+ )
+ if current_line_is_import:
+ line = ""
+ if ignore_comments:
+ line = line.split("#", 1)[0].strip()
+ strippedlines.append(line)
+ return strippedlines
+
+
+class LineSet:
+ """Holds and indexes all the lines of a single source file"""
+
+ def __init__(
+ self,
+ name,
+ lines,
+ ignore_comments=False,
+ ignore_docstrings=False,
+ ignore_imports=False,
+ ):
+ self.name = name
+ self._real_lines = lines
+ self._stripped_lines = stripped_lines(
+ lines, ignore_comments, ignore_docstrings, ignore_imports
+ )
+ self._index = self._mk_index()
+
+ def __str__(self):
+ return "<Lineset for %s>" % self.name
+
+ def __len__(self):
+ return len(self._real_lines)
+
+ def __getitem__(self, index):
+ return self._stripped_lines[index]
+
+ def __lt__(self, other):
+ return self.name < other.name
+
+ def __hash__(self):
+ return id(self)
+
+ def enumerate_stripped(self, start_at=0):
+ """return an iterator on stripped lines, starting from a given index
+ if specified, else 0
+ """
+ idx = start_at
+ if start_at:
+ lines = self._stripped_lines[start_at:]
+ else:
+ lines = self._stripped_lines
+ for line in lines:
+ # if line:
+ yield idx, line
+ idx += 1
+
+ def find(self, stripped_line):
+ """return positions of the given stripped line in this set"""
+ return self._index.get(stripped_line, ())
+
+ def _mk_index(self):
+ """create the index for this set"""
+ index = defaultdict(list)
+ for line_no, line in enumerate(self._stripped_lines):
+ if line:
+ index[line].append(line_no)
+ return index
+
+
+MSGS = {
+ "R0801": (
+ "Similar lines in %s files\n%s",
+ "duplicate-code",
+ "Indicates that a set of similar lines has been detected "
+ "among multiple file. This usually means that the code should "
+ "be refactored to avoid this duplication.",
+ )
+}
+
+
+def report_similarities(sect, stats, old_stats):
+ """make a layout with some stats about duplication"""
+ lines = ["", "now", "previous", "difference"]
+ lines += table_lines_from_stats(
+ stats, old_stats, ("nb_duplicated_lines", "percent_duplicated_lines")
+ )
+ sect.append(Table(children=lines, cols=4, rheaders=1, cheaders=1))
+
+
+# wrapper to get a pylint checker from the similar class
+class SimilarChecker(BaseChecker, Similar):
+ """checks for similarities and duplicated code. This computation may be
+ memory / CPU intensive, so you should disable it if you experiment some
+ problems.
+ """
+
+ __implements__ = (IRawChecker,)
+ # configuration section name
+ name = "similarities"
+ # messages
+ msgs = MSGS
+ # configuration options
+ # for available dict keys/values see the optik parser 'add_option' method
+ options = (
+ (
+ "min-similarity-lines", # type: ignore
+ {
+ "default": 4,
+ "type": "int",
+ "metavar": "<int>",
+ "help": "Minimum lines number of a similarity.",
+ },
+ ),
+ (
+ "ignore-comments",
+ {
+ "default": True,
+ "type": "yn",
+ "metavar": "<y or n>",
+ "help": "Ignore comments when computing similarities.",
+ },
+ ),
+ (
+ "ignore-docstrings",
+ {
+ "default": True,
+ "type": "yn",
+ "metavar": "<y or n>",
+ "help": "Ignore docstrings when computing similarities.",
+ },
+ ),
+ (
+ "ignore-imports",
+ {
+ "default": False,
+ "type": "yn",
+ "metavar": "<y or n>",
+ "help": "Ignore imports when computing similarities.",
+ },
+ ),
+ )
+ # reports
+ reports = (("RP0801", "Duplication", report_similarities),) # type: ignore
+
+ def __init__(self, linter=None):
+ BaseChecker.__init__(self, linter)
+ Similar.__init__(
+ self, min_lines=4, ignore_comments=True, ignore_docstrings=True
+ )
+ self.stats = None
+
+ def set_option(self, optname, value, action=None, optdict=None):
+ """method called to set an option (registered in the options list)
+
+ overridden to report options setting to Similar
+ """
+ BaseChecker.set_option(self, optname, value, action, optdict)
+ if optname == "min-similarity-lines":
+ self.min_lines = self.config.min_similarity_lines
+ elif optname == "ignore-comments":
+ self.ignore_comments = self.config.ignore_comments
+ elif optname == "ignore-docstrings":
+ self.ignore_docstrings = self.config.ignore_docstrings
+ elif optname == "ignore-imports":
+ self.ignore_imports = self.config.ignore_imports
+
+ def open(self):
+ """init the checkers: reset linesets and statistics information"""
+ self.linesets = []
+ self.stats = self.linter.add_stats(
+ nb_duplicated_lines=0, percent_duplicated_lines=0
+ )
+
+ def process_module(self, node):
+ """process a module
+
+ the module's content is accessible via the stream object
+
+ stream must implement the readlines method
+ """
+ with node.stream() as stream:
+ self.append_stream(self.linter.current_name, stream, node.file_encoding)
+
+ def close(self):
+ """compute and display similarities on closing (i.e. end of parsing)"""
+ total = sum(len(lineset) for lineset in self.linesets)
+ duplicated = 0
+ stats = self.stats
+ for num, couples in self._compute_sims():
+ msg = []
+ lineset = idx = None
+ for lineset, idx in couples:
+ msg.append("==%s:%s" % (lineset.name, idx))
+ msg.sort()
+
+ if lineset:
+ for line in lineset._real_lines[idx : idx + num]:
+ msg.append(line.rstrip())
+
+ self.add_message("R0801", args=(len(couples), "\n".join(msg)))
+ duplicated += num * (len(couples) - 1)
+ stats["nb_duplicated_lines"] = duplicated
+ stats["percent_duplicated_lines"] = total and duplicated * 100.0 / total
+
+
+def register(linter):
+ """required method to auto register this checker """
+ linter.register_checker(SimilarChecker(linter))
+
+
+def usage(status=0):
+ """display command line usage information"""
+ print("finds copy pasted blocks in a set of files")
+ print()
+ print(
+ "Usage: symilar [-d|--duplicates min_duplicated_lines] \
+[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1..."
+ )
+ sys.exit(status)
+
+
+def Run(argv=None):
+ """standalone command line access point"""
+ if argv is None:
+ argv = sys.argv[1:]
+
+ s_opts = "hdi"
+ l_opts = (
+ "help",
+ "duplicates=",
+ "ignore-comments",
+ "ignore-imports",
+ "ignore-docstrings",
+ )
+ min_lines = 4
+ ignore_comments = False
+ ignore_docstrings = False
+ ignore_imports = False
+ opts, args = getopt(argv, s_opts, l_opts)
+ for opt, val in opts:
+ if opt in ("-d", "--duplicates"):
+ min_lines = int(val)
+ elif opt in ("-h", "--help"):
+ usage()
+ elif opt in ("-i", "--ignore-comments"):
+ ignore_comments = True
+ elif opt in ("--ignore-docstrings",):
+ ignore_docstrings = True
+ elif opt in ("--ignore-imports",):
+ ignore_imports = True
+ if not args:
+ usage(1)
+ sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)
+ for filename in args:
+ with open(filename) as stream:
+ sim.append_stream(filename, stream)
+ sim.run()
+ sys.exit(0)
+
+
+if __name__ == "__main__":
+ Run()