Source code for codegrade.models.plagiarism_match

"""The module that defines the ``PlagiarismMatch`` model.

SPDX-License-Identifier: AGPL-3.0-only OR BSD-3-Clause-Clear
"""

from __future__ import annotations

import typing as t
from dataclasses import dataclass, field

import cg_request_args as rqa

from .. import parsers
from ..utils import to_dict
from .base_directory import BaseDirectory
from .base_file import BaseFile


[docs]@dataclass
class PlagiarismMatch:
    """A single plagiarism match, this is a hunk of code that look similar."""

    #: The id of this match.
    id: int
    #: The two files that construct this match. This list always has a length
    #: of 2.
    files: t.Sequence[t.Union[BaseFile, BaseDirectory]]
    #: The lines that match in each file, item in this list (which always has a
    #: length of 2) is the start,end tuple.
    lines: t.Sequence[t.Sequence[t.Union[int]]]

    raw_data: t.Optional[t.Dict[str, t.Any]] = field(init=False, repr=False)

    data_parser: t.ClassVar = rqa.Lazy(
        lambda: rqa.FixedMapping(
            rqa.RequiredArgument(
                "id",
                rqa.SimpleValue.int,
                doc="The id of this match.",
            ),
            rqa.RequiredArgument(
                "files",
                rqa.List(
                    parsers.make_union(
                        parsers.ParserFor.make(BaseFile),
                        parsers.ParserFor.make(BaseDirectory),
                    )
                ),
                doc="The two files that construct this match. This list always has a length of 2.",
            ),
            rqa.RequiredArgument(
                "lines",
                rqa.List(rqa.List(parsers.make_union(rqa.SimpleValue.int))),
                doc="The lines that match in each file, item in this list (which always has a length of 2) is the start,end tuple.",
            ),
        ).use_readable_describe(True)
    )

    def to_dict(self) -> t.Dict[str, t.Any]:
        res: t.Dict[str, t.Any] = {
            "id": to_dict(self.id),
            "files": to_dict(self.files),
            "lines": to_dict(self.lines),
        }
        return res

    @classmethod
    def from_dict(
        cls: t.Type[PlagiarismMatch], d: t.Dict[str, t.Any]
    ) -> PlagiarismMatch:
        parsed = cls.data_parser.try_parse(d)

        res = cls(
            id=parsed.id,
            files=parsed.files,
            lines=parsed.lines,
        )
        res.raw_data = d
        return res