Source code for bugzoo.core.patch

from copy import copy
from typing import List, Iterator

# See following for details about unified diff format:
#   https://www.artima.com/weblogs/viewpost.jsp?thread=164293
#   https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html#Detailed-Unified

class HunkLine(object):
    pass


class InsertedLine(HunkLine):
    def __init__(self, line: str) -> None:
        """
        Constructs a new InsertedLine instance.

        Params:
            line:   The contents of the line that was inserted (with any
                    trailing line endings removed).
        """
        self.__line = line

    def __str__(self) -> str:
        return "+{}".format(self.__line)


class DeletedLine(HunkLine):
    def __init__(self, line: str) -> None:
        """
        Constructs a new DeletedLine instance.

        Params:
            line:   The contents of the line that was removed (with any
                    trailing line endings removed).
        """
        self.__line = line

    def __str__(self) -> str:
        return "-{}".format(self.__line)


class ContextLine(HunkLine):
    def __init__(self, line: str) -> None:
        """
        Constructs a new ContextLine instance.

        Params:
            line:   The contents of the line.
        """
        self.__line = line

    def __str__(self) -> str:
        return " {}".format(self.__line)


class Hunk(object):
    @classmethod
    def _read_next(cls, lines: List[str]) -> 'Hunk':
        """
        Constructs a hunk from a supplied fragment of a unified format diff.
        """
        header = lines[0]
        assert header.startswith('@@ -')

        # sometimes the first line can occur on the same line as the header.
        # in that case, we inject a new line into the buffer
        end_header_at = header.index(' @@')
        bonus_line = header[end_header_at+3:]
        if bonus_line != "":
            lines.insert(1, bonus_line)

        header = header[4:end_header_at]
        left, _, right = header.partition(' +')
        old_start_at = int(left.split(',')[0])
        new_start_at = int(right.split(',')[0])

        old_line_num = old_start_at
        new_line_num = new_start_at
        last_insertion_at = old_start_at


        hunk_lines = [] # type: List[HunkLine]
        while True:
            # discarding the previous line ensures that we only consume lines
            # from the line buffer that belong to the hunk
            lines.pop(0)
            if not lines:
                break

            line = lines[0]

            # inserted line
            if line.startswith('+'):
                hunk_lines.append(InsertedLine(line[1:]))
                new_line_num += 1

            # deleted line
            elif line.startswith('-'):
                hunk_lines.append(DeletedLine(line[1:]))
                old_line_num += 1

            # context line
            elif line.startswith(' '):
                hunk_lines.append(ContextLine(line[1:]))
                new_line_num += 1
                old_line_num += 1

            # end of hunk
            else:
                break

        return Hunk(old_start_at, new_start_at, hunk_lines)

    def __init__(self,
                 old_start_at: int,
                 new_start_at: int,
                 lines: List[HunkLine]
                 ) -> None:
        self.__old_start_at = old_start_at
        self.__new_start_at = new_start_at
        self.__lines = lines

    def __str__(self) -> str:
        """
        Returns the contents of this hunk as part of a unified format diff.
        """
        num_deleted = \
            sum(1 for l in self.__lines if isinstance(l, DeletedLine))
        num_inserted = \
            sum(1 for l in self.__lines if isinstance(l, InsertedLine))
        num_context = \
            sum(1 for l in self.__lines if isinstance(l, ContextLine))

        num_old_lines = num_context + num_deleted
        num_new_lines = num_context + num_inserted

        header = '@@ -{},{} +{},{} @@'.format(self.__old_start_at,
                                              num_old_lines,
                                              self.__new_start_at,
                                              num_new_lines)
        body = [str(line) for line in self.__lines]
        return '\n'.join([header] + body)


class FilePatch(object):
    """
    Represents a set of changes to a single text-based file.
    """
    @classmethod
    def _read_next(cls, lines: List[str]) -> 'FilePatch':
        """
        Destructively extracts the next file patch from the line buffer.
        """
        # keep munching lines until we hit one starting with '---'
        while True:
            if not lines:
                raise Exception("illegal file patch format: couldn't find line starting with '---'")
            line = lines[0]
            if line.startswith('---'):
                break
            lines.pop(0)

        assert lines[0].startswith('---')
        assert lines[1].startswith('+++')
        old_fn = lines.pop(0)[4:].strip()
        new_fn = lines.pop(0)[4:].strip()

        hunks = []
        while lines:
            if not lines[0].startswith('@@'):
                break
            hunks.append(Hunk._read_next(lines))

        return FilePatch(old_fn, new_fn, hunks)

    def __init__(self,
                 old_fn: str,
                 new_fn: str,
                 hunks: List[Hunk]
                 ) -> None:
        self.__old_fn = old_fn
        self.__new_fn = new_fn
        self.__hunks = hunks

    @property
    def old_fn(self) -> str:
        return self.__old_fn

    @property
    def new_fn(self) -> str:
        return self.__new_fn

    def __str__(self) -> str:
        """
        Returns a string encoding of this file patch in the unified diff
        format.
        """
        old_fn_line = '--- {}'.format(self.__old_fn)
        new_fn_line = '+++ {}'.format(self.__new_fn)
        lines = [old_fn_line, new_fn_line] + [str(h) for h in self.__hunks]
        return '\n'.join(lines)


[docs]class Patch(object): """ Represents a set of changes to one-or-more text-based files. """
[docs] @classmethod def from_unidiff(cls, diff: str) -> 'Patch': """ Constructs a Patch from a provided unified format diff. """ lines = diff.split('\n') file_patches = [] while lines: if lines[0] == '' or lines[0].isspace(): lines.pop(0) continue file_patches.append(FilePatch._read_next(lines)) return Patch(file_patches)
def __init__(self, file_patches: List[FilePatch]) -> None: self.__file_patches = file_patches[:] @property def files(self) -> List[str]: """ Returns a list of the names of the files that are changed by this patch. """ return [fp.old_fn for fp in self.__file_patches] def __str__(self) -> str: """ Returns the contents of this patch as a unified format diff. """ file_patches = [str(p) for p in self.__file_patches] return '\n'.join(file_patches + [''])