1 files changed, 452 insertions, 0 deletions
diff --git a/venv/Lib/site-packages/pylint/checkers/similar.py b/venv/Lib/site-packages/pylint/checkers/similar.py
new file mode 100644
index 0000000..019b55f
--- /dev/null
+++ b/venv/Lib/site-packages/pylint/checkers/similar.py
@@ -0,0 +1,452 @@
+# Copyright (c) 2006, 2008-2014 LOGILAB S.A. (Paris, FRANCE) <contact@logilab.fr>
+# Copyright (c) 2012 Ry4an Brase <ry4an-hg@ry4an.org>
+# Copyright (c) 2012 Google, Inc.
+# Copyright (c) 2012 Anthony VEREZ <anthony.verez.external@cassidian.com>
+# Copyright (c) 2014-2018 Claudiu Popa <pcmanticore@gmail.com>
+# Copyright (c) 2014 Brett Cannon <brett@python.org>
+# Copyright (c) 2014 Arun Persaud <arun@nubati.net>
+# Copyright (c) 2015 Ionel Cristian Maries <contact@ionelmc.ro>
+# Copyright (c) 2017 Anthony Sottile <asottile@umich.edu>
+# Copyright (c) 2017 Mikhail Fesenko <proggga@gmail.com>
+# Copyright (c) 2018 ssolanki <sushobhitsolanki@gmail.com>
+
+# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
+# For details: https://github.com/PyCQA/pylint/blob/master/COPYING
+
+# pylint: disable=redefined-builtin
+"""a similarities / code duplication command line tool and pylint checker
+"""
+
+import sys
+from collections import defaultdict
+from getopt import getopt
+from itertools import groupby
+
+import astroid
+
+from pylint.checkers import BaseChecker, table_lines_from_stats
+from pylint.interfaces import IRawChecker
+from pylint.reporters.ureports.nodes import Table
+from pylint.utils import decoding_stream
+
+
+class Similar:
+    """finds copy-pasted lines of code in a project"""
+
+    def __init__(
+        self,
+        min_lines=4,
+        ignore_comments=False,
+        ignore_docstrings=False,
+        ignore_imports=False,
+    ):
+        self.min_lines = min_lines
+        self.ignore_comments = ignore_comments
+        self.ignore_docstrings = ignore_docstrings
+        self.ignore_imports = ignore_imports
+        self.linesets = []
+
+    def append_stream(self, streamid, stream, encoding=None):
+        """append a file to search for similarities"""
+        if encoding is None:
+            readlines = stream.readlines
+        else:
+            readlines = decoding_stream(stream, encoding).readlines
+        try:
+            self.linesets.append(
+                LineSet(
+                    streamid,
+                    readlines(),
+                    self.ignore_comments,
+                    self.ignore_docstrings,
+                    self.ignore_imports,
+                )
+            )
+        except UnicodeDecodeError:
+            pass
+
+    def run(self):
+        """start looking for similarities and display results on stdout"""
+        self._display_sims(self._compute_sims())
+
+    def _compute_sims(self):
+        """compute similarities in appended files"""
+        no_duplicates = defaultdict(list)
+        for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():
+            duplicate = no_duplicates[num]
+            for couples in duplicate:
+                if (lineset1, idx1) in couples or (lineset2, idx2) in couples:
+                    couples.add((lineset1, idx1))
+                    couples.add((lineset2, idx2))
+                    break
+            else:
+                duplicate.append({(lineset1, idx1), (lineset2, idx2)})
+        sims = []
+        for num, ensembles in no_duplicates.items():
+            for couples in ensembles:
+                sims.append((num, couples))
+        sims.sort()
+        sims.reverse()
+        return sims
+
+    def _display_sims(self, sims):
+        """display computed similarities on stdout"""
+        nb_lignes_dupliquees = 0
+        for num, couples in sims:
+            print()
+            print(num, "similar lines in", len(couples), "files")
+            couples = sorted(couples)
+            lineset = idx = None
+            for lineset, idx in couples:
+                print("==%s:%s" % (lineset.name, idx))
+            if lineset:
+                for line in lineset._real_lines[idx : idx + num]:
+                    print("  ", line.rstrip())
+            nb_lignes_dupliquees += num * (len(couples) - 1)
+        nb_total_lignes = sum([len(lineset) for lineset in self.linesets])
+        print(
+            "TOTAL lines=%s duplicates=%s percent=%.2f"
+            % (
+                nb_total_lignes,
+                nb_lignes_dupliquees,
+                nb_lignes_dupliquees * 100.0 / nb_total_lignes,
+            )
+        )
+
+    def _find_common(self, lineset1, lineset2):
+        """find similarities in the two given linesets"""
+        lines1 = lineset1.enumerate_stripped
+        lines2 = lineset2.enumerate_stripped
+        find = lineset2.find
+        index1 = 0
+        min_lines = self.min_lines
+        while index1 < len(lineset1):
+            skip = 1
+            num = 0
+            for index2 in find(lineset1[index1]):
+                non_blank = 0
+                for num, ((_, line1), (_, line2)) in enumerate(
+                    zip(lines1(index1), lines2(index2))
+                ):
+                    if line1 != line2:
+                        if non_blank > min_lines:
+                            yield num, lineset1, index1, lineset2, index2
+                        skip = max(skip, num)
+                        break
+                    if line1:
+                        non_blank += 1
+                else:
+                    # we may have reach the end
+                    num += 1
+                    if non_blank > min_lines:
+                        yield num, lineset1, index1, lineset2, index2
+                    skip = max(skip, num)
+            index1 += skip
+
+    def _iter_sims(self):
+        """iterate on similarities among all files, by making a cartesian
+        product
+        """
+        for idx, lineset in enumerate(self.linesets[:-1]):
+            for lineset2 in self.linesets[idx + 1 :]:
+                for sim in self._find_common(lineset, lineset2):
+                    yield sim
+
+
+def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
+    """return lines with leading/trailing whitespace and any ignored code
+    features removed
+    """
+    if ignore_imports:
+        tree = astroid.parse("".join(lines))
+        node_is_import_by_lineno = (
+            (node.lineno, isinstance(node, (astroid.Import, astroid.ImportFrom)))
+            for node in tree.body
+        )
+        line_begins_import = {
+            lineno: all(is_import for _, is_import in node_is_import_group)
+            for lineno, node_is_import_group in groupby(
+                node_is_import_by_lineno, key=lambda x: x[0]
+            )
+        }
+        current_line_is_import = False
+
+    strippedlines = []
+    docstring = None
+    for lineno, line in enumerate(lines, start=1):
+        line = line.strip()
+        if ignore_docstrings:
+            if not docstring and any(
+                line.startswith(i) for i in ['"""', "'''", 'r"""', "r'''"]
+            ):
+                docstring = line[:3]
+                line = line[3:]
+            if docstring:
+                if line.endswith(docstring):
+                    docstring = None
+                line = ""
+        if ignore_imports:
+            current_line_is_import = line_begins_import.get(
+                lineno, current_line_is_import
+            )
+            if current_line_is_import:
+                line = ""
+        if ignore_comments:
+            line = line.split("#", 1)[0].strip()
+        strippedlines.append(line)
+    return strippedlines
+
+
+class LineSet:
+    """Holds and indexes all the lines of a single source file"""
+
+    def __init__(
+        self,
+        name,
+        lines,
+        ignore_comments=False,
+        ignore_docstrings=False,
+        ignore_imports=False,
+    ):
+        self.name = name
+        self._real_lines = lines
+        self._stripped_lines = stripped_lines(
+            lines, ignore_comments, ignore_docstrings, ignore_imports
+        )
+        self._index = self._mk_index()
+
+    def __str__(self):
+        return "<Lineset for %s>" % self.name
+
+    def __len__(self):
+        return len(self._real_lines)
+
+    def __getitem__(self, index):
+        return self._stripped_lines[index]
+
+    def __lt__(self, other):
+        return self.name < other.name
+
+    def __hash__(self):
+        return id(self)
+
+    def enumerate_stripped(self, start_at=0):
+        """return an iterator on stripped lines, starting from a given index
+        if specified, else 0
+        """
+        idx = start_at
+        if start_at:
+            lines = self._stripped_lines[start_at:]
+        else:
+            lines = self._stripped_lines
+        for line in lines:
+            # if line:
+            yield idx, line
+            idx += 1
+
+    def find(self, stripped_line):
+        """return positions of the given stripped line in this set"""
+        return self._index.get(stripped_line, ())
+
+    def _mk_index(self):
+        """create the index for this set"""
+        index = defaultdict(list)
+        for line_no, line in enumerate(self._stripped_lines):
+            if line:
+                index[line].append(line_no)
+        return index
+
+
+MSGS = {
+    "R0801": (
+        "Similar lines in %s files\n%s",
+        "duplicate-code",
+        "Indicates that a set of similar lines has been detected "
+        "among multiple file. This usually means that the code should "
+        "be refactored to avoid this duplication.",
+    )
+}
+
+
+def report_similarities(sect, stats, old_stats):
+    """make a layout with some stats about duplication"""
+    lines = ["", "now", "previous", "difference"]
+    lines += table_lines_from_stats(
+        stats, old_stats, ("nb_duplicated_lines", "percent_duplicated_lines")
+    )
+    sect.append(Table(children=lines, cols=4, rheaders=1, cheaders=1))
+
+
+# wrapper to get a pylint checker from the similar class
+class SimilarChecker(BaseChecker, Similar):
+    """checks for similarities and duplicated code. This computation may be
+    memory / CPU intensive, so you should disable it if you experiment some
+    problems.
+    """
+
+    __implements__ = (IRawChecker,)
+    # configuration section name
+    name = "similarities"
+    # messages
+    msgs = MSGS
+    # configuration options
+    # for available dict keys/values see the optik parser 'add_option' method
+    options = (
+        (
+            "min-similarity-lines",  # type: ignore
+            {
+                "default": 4,
+                "type": "int",
+                "metavar": "<int>",
+                "help": "Minimum lines number of a similarity.",
+            },
+        ),
+        (
+            "ignore-comments",
+            {
+                "default": True,
+                "type": "yn",
+                "metavar": "<y or n>",
+                "help": "Ignore comments when computing similarities.",
+            },
+        ),
+        (
+            "ignore-docstrings",
+            {
+                "default": True,
+                "type": "yn",
+                "metavar": "<y or n>",
+                "help": "Ignore docstrings when computing similarities.",
+            },
+        ),
+        (
+            "ignore-imports",
+            {
+                "default": False,
+                "type": "yn",
+                "metavar": "<y or n>",
+                "help": "Ignore imports when computing similarities.",
+            },
+        ),
+    )
+    # reports
+    reports = (("RP0801", "Duplication", report_similarities),)  # type: ignore
+
+    def __init__(self, linter=None):
+        BaseChecker.__init__(self, linter)
+        Similar.__init__(
+            self, min_lines=4, ignore_comments=True, ignore_docstrings=True
+        )
+        self.stats = None
+
+    def set_option(self, optname, value, action=None, optdict=None):
+        """method called to set an option (registered in the options list)
+
+        overridden to report options setting to Similar
+        """
+        BaseChecker.set_option(self, optname, value, action, optdict)
+        if optname == "min-similarity-lines":
+            self.min_lines = self.config.min_similarity_lines
+        elif optname == "ignore-comments":
+            self.ignore_comments = self.config.ignore_comments
+        elif optname == "ignore-docstrings":
+            self.ignore_docstrings = self.config.ignore_docstrings
+        elif optname == "ignore-imports":
+            self.ignore_imports = self.config.ignore_imports
+
+    def open(self):
+        """init the checkers: reset linesets and statistics information"""
+        self.linesets = []
+        self.stats = self.linter.add_stats(
+            nb_duplicated_lines=0, percent_duplicated_lines=0
+        )
+
+    def process_module(self, node):
+        """process a module
+
+        the module's content is accessible via the stream object
+
+        stream must implement the readlines method
+        """
+        with node.stream() as stream:
+            self.append_stream(self.linter.current_name, stream, node.file_encoding)
+
+    def close(self):
+        """compute and display similarities on closing (i.e. end of parsing)"""
+        total = sum(len(lineset) for lineset in self.linesets)
+        duplicated = 0
+        stats = self.stats
+        for num, couples in self._compute_sims():
+            msg = []
+            lineset = idx = None
+            for lineset, idx in couples:
+                msg.append("==%s:%s" % (lineset.name, idx))
+            msg.sort()
+
+            if lineset:
+                for line in lineset._real_lines[idx : idx + num]:
+                    msg.append(line.rstrip())
+
+            self.add_message("R0801", args=(len(couples), "\n".join(msg)))
+            duplicated += num * (len(couples) - 1)
+        stats["nb_duplicated_lines"] = duplicated
+        stats["percent_duplicated_lines"] = total and duplicated * 100.0 / total
+
+
+def register(linter):
+    """required method to auto register this checker """
+    linter.register_checker(SimilarChecker(linter))
+
+
+def usage(status=0):
+    """display command line usage information"""
+    print("finds copy pasted blocks in a set of files")
+    print()
+    print(
+        "Usage: symilar [-d|--duplicates min_duplicated_lines] \
+[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1..."
+    )
+    sys.exit(status)
+
+
+def Run(argv=None):
+    """standalone command line access point"""
+    if argv is None:
+        argv = sys.argv[1:]
+
+    s_opts = "hdi"
+    l_opts = (
+        "help",
+        "duplicates=",
+        "ignore-comments",
+        "ignore-imports",
+        "ignore-docstrings",
+    )
+    min_lines = 4
+    ignore_comments = False
+    ignore_docstrings = False
+    ignore_imports = False
+    opts, args = getopt(argv, s_opts, l_opts)
+    for opt, val in opts:
+        if opt in ("-d", "--duplicates"):
+            min_lines = int(val)
+        elif opt in ("-h", "--help"):
+            usage()
+        elif opt in ("-i", "--ignore-comments"):
+            ignore_comments = True
+        elif opt in ("--ignore-docstrings",):
+            ignore_docstrings = True
+        elif opt in ("--ignore-imports",):
+            ignore_imports = True
+    if not args:
+        usage(1)
+    sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)
+    for filename in args:
+        with open(filename) as stream:
+            sim.append_stream(filename, stream)
+    sim.run()
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    Run()