Context extraction timestamp: 20240804_143019

<repository_structure>
<directory name="academic-claim-analyzer">
    <file>
        <name>.env</name>
        <path>.env</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>.gitignore</name>
        <path>.gitignore</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>conftest.py</name>
        <path>conftest.py</path>
        <content>
# conftest.py
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))

        </content>
    </file>
    <file>
        <name>cookies.json</name>
        <path>cookies.json</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>firecrawl_setup.md</name>
        <path>firecrawl_setup.md</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>repo_context_extractor.py</name>
        <path>repo_context_extractor.py</path>
        <content>
import os
import datetime

EXCLUDED_DIRS = {".git", "__pycache__", "node_modules", ".venv"}
FULL_CONTENT_EXTENSIONS = {".py", ".toml", ".dbml", ".yaml"}
ALWAYS_INCLUDE_FILES = {"requirements.txt", "LICENSE", "README.md"}

def create_file_element(file_path, root_folder):
    relative_path = os.path.relpath(file_path, root_folder)
    file_name = os.path.basename(file_path)
    file_extension = os.path.splitext(file_name)[1]

    file_element = [
        f"    <file>\n        <name>{file_name}</name>\n        <path>{relative_path}</path>\n"
    ]

    if file_extension in FULL_CONTENT_EXTENSIONS or file_name in ALWAYS_INCLUDE_FILES:
        file_element.append("        <content>\n")
        try:
            with open(file_path, "r", encoding="utf-8") as file:
                file_element.append(file.read())
        except UnicodeDecodeError:
            file_element.append("Binary or non-UTF-8 content not displayed")
        file_element.append("\n        </content>\n")
    else:
        file_element.append("        <content>Full content not provided</content>\n")

    file_element.append("    </file>\n")
    return "".join(file_element)

def get_repo_structure(root_folder):
    structure = ["<repository_structure>\n"]
    always_include_files = []

    for subdir, dirs, files in os.walk(root_folder):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_DIRS]
        level = subdir.replace(root_folder, "").count(os.sep)
        indent = " " * 4 * level
        relative_subdir = os.path.relpath(subdir, root_folder)

        structure.append(f'{indent}<directory name="{os.path.basename(subdir)}">\n')
        for file in files:
            file_path = os.path.join(subdir, file)
            if file in ALWAYS_INCLUDE_FILES:
                always_include_files.append((file_path, root_folder))
            else:
                file_element = create_file_element(file_path, root_folder)
                structure.append(file_element)
        structure.append(f"{indent}</directory>\n")

    # Add always included files at the end of the structure
    for file_path, root_folder in always_include_files:
        file_element = create_file_element(file_path, root_folder)
        structure.append(file_element)

    structure.append("</repository_structure>\n")
    return "".join(structure)

def main():
    root_folder = os.getcwd()  # Use the current working directory
    base_dir = os.path.basename(root_folder)
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = os.path.join(root_folder, f"{base_dir}_context_{timestamp}.txt")

    # Delete the previous output file if it exists
    for file in os.listdir(root_folder):
        if file.startswith(f"{base_dir}_context_") and file.endswith(".txt"):
            os.remove(os.path.join(root_folder, file))
            print(f"Deleted previous context file: {file}")

    repo_structure = get_repo_structure(root_folder)

    with open(output_file, "w", encoding="utf-8") as f:
        f.write(f"Context extraction timestamp: {timestamp}\n\n")
        f.write(repo_structure)

    print(f"Fresh repository context has been extracted to {output_file}")

if __name__ == "__main__":
    main()
        </content>
    </file>
    <file>
        <name>scraper.log</name>
        <path>scraper.log</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>setup.py</name>
        <path>setup.py</path>
        <content>
# setup.py
import subprocess
import sys
from setuptools import setup, find_packages
from setuptools.command.develop import develop
from setuptools.command.install import install

class PostInstallCommand(install):
    def run(self):
        install.run(self)
        self.install_playwright_browsers()

    def install_playwright_browsers(self):
        subprocess.check_call([sys.executable, "-m", "playwright", "install"])

class PostDevelopCommand(develop):
    def run(self):
        develop.run(self)
        self.install_playwright_browsers()

    def install_playwright_browsers(self):
        subprocess.check_call([sys.executable, "-m", "playwright", "install"])

with open('README.md', 'r', encoding='utf-8') as f:
    long_description = f.read()

setup(
    name='academic-claim-analyzer',
    use_scm_version=True,
    setup_requires=['setuptools_scm'],
    author='Bryan Nsoh',
    author_email='bryan.anye.5@gmail.com',
    description='A tool for analyzing academic claims',
    long_description=long_description,
    long_description_content_type='text/markdown',
    url='https://github.com/BryanNsoh/async_llm_handler',
    packages=find_packages(),
    install_requires=[
        'aiohttp',
        'anthropic',
        'google-generativeai',
        'openai',
        'python-dotenv',
        'tiktoken',
        'asyncio',
        'beautifulsoup4',
        'PyMuPDF',
        'playwright==1.36.0',
        'fake-useragent',
        'async-llm-handler'
    ],
    extras_require={
        'dev': ['pytest', 'pytest-asyncio'],
    },
    classifiers=[
        'Programming Language :: Python :: 3',
        'License :: OSI Approved :: MIT License',
        'Operating System :: OS Independent',
    ],
    python_requires='>=3.7',
    cmdclass={
        'develop': PostDevelopCommand,
        'install': PostInstallCommand,
    },
)
        </content>
    </file>
</directory>
    <directory name=".eggs">
    <file>
        <name>README.txt</name>
        <path>.eggs\README.txt</path>
        <content>Full content not provided</content>
    </file>
    </directory>
        <directory name="setuptools_scm-8.1.0-py3.11.egg">
        </directory>
            <directory name="EGG-INFO">
    <file>
        <name>entry_points.txt</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\EGG-INFO\entry_points.txt</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>PKG-INFO</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\EGG-INFO\PKG-INFO</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>RECORD</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\EGG-INFO\RECORD</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>requires.txt</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\EGG-INFO\requires.txt</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>top_level.txt</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\EGG-INFO\top_level.txt</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>WHEEL</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\EGG-INFO\WHEEL</path>
        <content>Full content not provided</content>
    </file>
            </directory>
            <directory name="setuptools_scm">
    <file>
        <name>.git_archival.txt</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\.git_archival.txt</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>discover.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\discover.py</path>
        <content>
from __future__ import annotations

import os

from pathlib import Path
from typing import Iterable
from typing import Iterator

from . import _entrypoints
from . import _log
from . import _types as _t
from ._config import Configuration

log = _log.log.getChild("discover")


def walk_potential_roots(root: _t.PathT, search_parents: bool = True) -> Iterator[Path]:
    """
    Iterate though a path and each of its parents.
    :param root: File path.
    :param search_parents: If ``False`` the parents are not considered.
    """
    root = Path(root)
    yield root
    if search_parents:
        yield from root.parents


def match_entrypoint(root: _t.PathT, name: str) -> bool:
    """
    Consider a ``root`` as entry-point.
    :param root: File path.
    :param name: Subdirectory name.
    :return: ``True`` if a subdirectory ``name`` exits in ``root``.
    """

    if os.path.exists(os.path.join(root, name)):
        if not os.path.isabs(name):
            return True
        log.debug("ignoring bad ep %s", name)

    return False


# blocked entrypints from legacy plugins
_BLOCKED_EP_TARGETS = {"setuptools_scm_git_archive:parse"}


def iter_matching_entrypoints(
    root: _t.PathT, entrypoint: str, config: Configuration
) -> Iterable[_entrypoints.EntryPoint]:
    """
    Consider different entry-points in ``root`` and optionally its parents.
    :param root: File path.
    :param entrypoint: Entry-point to consider.
    :param config: Configuration,
        read ``search_parent_directories``, write found parent to ``parent``.
    """

    log.debug("looking for ep %s in %s", entrypoint, root)
    from ._entrypoints import iter_entry_points

    for wd in walk_potential_roots(root, config.search_parent_directories):
        for ep in iter_entry_points(entrypoint):
            if ep.value in _BLOCKED_EP_TARGETS:
                continue
            if match_entrypoint(wd, ep.name):
                log.debug("found ep %s in %s", ep, wd)
                config.parent = wd
                yield ep

        </content>
    </file>
    <file>
        <name>fallbacks.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\fallbacks.py</path>
        <content>
from __future__ import annotations

import logging
import os

from pathlib import Path
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from . import _types as _t
from . import Configuration
from .integration import data_from_mime
from .version import ScmVersion
from .version import meta
from .version import tag_to_version

log = logging.getLogger(__name__)

_UNKNOWN = "UNKNOWN"


def parse_pkginfo(root: _t.PathT, config: Configuration) -> ScmVersion | None:
    pkginfo = Path(root) / "PKG-INFO"
    log.debug("pkginfo %s", pkginfo)
    data = data_from_mime(pkginfo)
    version = data.get("Version", _UNKNOWN)
    if version != _UNKNOWN:
        return meta(version, preformatted=True, config=config)
    else:
        return None


def fallback_version(root: _t.PathT, config: Configuration) -> ScmVersion | None:
    if config.parentdir_prefix_version is not None:
        _, parent_name = os.path.split(os.path.abspath(root))
        if parent_name.startswith(config.parentdir_prefix_version):
            version = tag_to_version(
                parent_name[len(config.parentdir_prefix_version) :], config
            )
            if version is not None:
                return meta(str(version), preformatted=True, config=config)
    if config.fallback_version is not None:
        log.debug("FALLBACK %s", config.fallback_version)
        return meta(config.fallback_version, preformatted=True, config=config)
    return None

        </content>
    </file>
    <file>
        <name>git.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\git.py</path>
        <content>
from __future__ import annotations

import dataclasses
import logging
import os
import re
import shlex
import sys
import warnings

from datetime import date
from datetime import datetime
from datetime import timezone
from os.path import samefile
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Callable
from typing import Sequence

from . import Configuration
from . import _types as _t
from . import discover
from ._run_cmd import CompletedProcess as _CompletedProcess
from ._run_cmd import require_command as _require_command
from ._run_cmd import run as _run
from .integration import data_from_mime
from .scm_workdir import Workdir
from .version import ScmVersion
from .version import meta
from .version import tag_to_version

if TYPE_CHECKING:
    from . import hg_git
log = logging.getLogger(__name__)

REF_TAG_RE = re.compile(r"(?<=\btag: )([^,]+)\b")
DESCRIBE_UNSUPPORTED = "%(describe"

# If testing command in shell make sure to quote the match argument like
# '*[0-9]*' as it will expand before being sent to git if there are any matching
# files in current directory.
DEFAULT_DESCRIBE = [
    "git",
    "describe",
    "--dirty",
    "--tags",
    "--long",
    "--match",
    "*[0-9]*",
]


def run_git(
    args: Sequence[str | os.PathLike[str]],
    repo: Path,
    *,
    check: bool = False,
    timeout: int | None = None,
) -> _CompletedProcess:
    return _run(
        ["git", "--git-dir", repo / ".git", *args],
        cwd=repo,
        check=check,
        timeout=timeout,
    )


class GitWorkdir(Workdir):
    """experimental, may change at any time"""

    @classmethod
    def from_potential_worktree(cls, wd: _t.PathT) -> GitWorkdir | None:
        wd = Path(wd).resolve()
        real_wd = run_git(["rev-parse", "--show-prefix"], wd).parse_success(parse=str)
        if real_wd is None:
            return None
        else:
            real_wd = real_wd[:-1]  # remove the trailing pathsep

        if not real_wd:
            real_wd = os.fspath(wd)
        else:
            str_wd = os.fspath(wd)
            assert str_wd.replace("\\", "/").endswith(real_wd)
            # In windows wd contains ``\`` which should be replaced by ``/``
            # for this assertion to work.  Length of string isn't changed by replace
            # ``\\`` is just and escape for `\`
            real_wd = str_wd[: -len(real_wd)]
        log.debug("real root %s", real_wd)
        if not samefile(real_wd, wd):
            return None

        return cls(Path(real_wd))

    def is_dirty(self) -> bool:
        return run_git(
            ["status", "--porcelain", "--untracked-files=no"], self.path
        ).parse_success(
            parse=bool,
            default=False,
        )

    def get_branch(self) -> str | None:
        return run_git(
            ["rev-parse", "--abbrev-ref", "HEAD"],
            self.path,
        ).parse_success(
            parse=str,
            error_msg="branch err (abbrev-err)",
        ) or run_git(
            ["symbolic-ref", "--short", "HEAD"],
            self.path,
        ).parse_success(
            parse=str,
            error_msg="branch err (symbolic-ref)",
        )

    def get_head_date(self) -> date | None:
        def parse_timestamp(timestamp_text: str) -> date | None:
            if "%c" in timestamp_text:
                log.warning("git too old -> timestamp is %r", timestamp_text)
                return None
            if sys.version_info < (3, 11) and timestamp_text.endswith("Z"):
                timestamp_text = timestamp_text[:-1] + "+00:00"
            return datetime.fromisoformat(timestamp_text).date()

        res = run_git(
            [
                *("-c", "log.showSignature=false"),
                *("log", "-n", "1", "HEAD"),
                "--format=%cI",
            ],
            self.path,
        )
        return res.parse_success(
            parse=parse_timestamp,
            error_msg="logging the iso date for head failed",
        )

    def is_shallow(self) -> bool:
        return self.path.joinpath(".git/shallow").is_file()

    def fetch_shallow(self) -> None:
        run_git(["fetch", "--unshallow"], self.path, check=True, timeout=240)

    def node(self) -> str | None:
        def _unsafe_short_node(node: str) -> str:
            return node[:7]

        return run_git(
            ["rev-parse", "--verify", "--quiet", "HEAD"], self.path
        ).parse_success(
            parse=_unsafe_short_node,
        )

    def count_all_nodes(self) -> int:
        res = run_git(["rev-list", "HEAD"], self.path)
        return res.stdout.count("\n") + 1

    def default_describe(self) -> _CompletedProcess:
        return run_git(DEFAULT_DESCRIBE[1:], self.path)


def warn_on_shallow(wd: GitWorkdir) -> None:
    """experimental, may change at any time"""
    if wd.is_shallow():
        warnings.warn(f'"{wd.path}" is shallow and may cause errors')


def fetch_on_shallow(wd: GitWorkdir) -> None:
    """experimental, may change at any time"""
    if wd.is_shallow():
        warnings.warn(f'"{wd.path}" was shallow, git fetch was used to rectify')
        wd.fetch_shallow()


def fail_on_shallow(wd: GitWorkdir) -> None:
    """experimental, may change at any time"""
    if wd.is_shallow():
        raise ValueError(
            f'{wd.path} is shallow, please correct with "git fetch --unshallow"'
        )


def get_working_directory(config: Configuration, root: _t.PathT) -> GitWorkdir | None:
    """
    Return the working directory (``GitWorkdir``).
    """

    if config.parent:  # todo broken
        return GitWorkdir.from_potential_worktree(config.parent)

    for potential_root in discover.walk_potential_roots(
        root, search_parents=config.search_parent_directories
    ):
        potential_wd = GitWorkdir.from_potential_worktree(potential_root)
        if potential_wd is not None:
            return potential_wd

    return GitWorkdir.from_potential_worktree(root)


def parse(
    root: _t.PathT,
    config: Configuration,
    describe_command: str | list[str] | None = None,
    pre_parse: Callable[[GitWorkdir], None] = warn_on_shallow,
) -> ScmVersion | None:
    """
    :param pre_parse: experimental pre_parse action, may change at any time
    """
    _require_command("git")
    wd = get_working_directory(config, root)
    if wd:
        return _git_parse_inner(
            config, wd, describe_command=describe_command, pre_parse=pre_parse
        )
    else:
        return None


def version_from_describe(
    wd: GitWorkdir | hg_git.GitWorkdirHgClient,
    config: Configuration,
    describe_command: _t.CMD_TYPE | None,
) -> ScmVersion | None:
    pass

    if config.git_describe_command is not None:
        describe_command = config.git_describe_command

    if describe_command is not None:
        if isinstance(describe_command, str):
            describe_command = shlex.split(describe_command)
            # todo: figure how to ensure git with gitdir gets correctly invoked
        if describe_command[0] == "git":
            describe_res = run_git(describe_command[1:], wd.path)
        else:
            describe_res = _run(describe_command, wd.path)
    else:
        describe_res = wd.default_describe()

    def parse_describe(output: str) -> ScmVersion:
        tag, distance, node, dirty = _git_parse_describe(output)
        return meta(tag=tag, distance=distance, dirty=dirty, node=node, config=config)

    return describe_res.parse_success(parse=parse_describe)


def _git_parse_inner(
    config: Configuration,
    wd: GitWorkdir | hg_git.GitWorkdirHgClient,
    pre_parse: None | (Callable[[GitWorkdir | hg_git.GitWorkdirHgClient], None]) = None,
    describe_command: _t.CMD_TYPE | None = None,
) -> ScmVersion:
    if pre_parse:
        pre_parse(wd)

    version = version_from_describe(wd, config, describe_command)

    if version is None:
        # If 'git git_describe_command' failed, try to get the information otherwise.
        tag = config.version_cls("0.0")
        node = wd.node()
        if node is None:
            distance = 0
            dirty = True
        else:
            distance = wd.count_all_nodes()
            node = "g" + node
            dirty = wd.is_dirty()
        version = meta(
            tag=tag, distance=distance, dirty=dirty, node=node, config=config
        )
    branch = wd.get_branch()
    node_date = wd.get_head_date() or datetime.now(timezone.utc).date()
    return dataclasses.replace(version, branch=branch, node_date=node_date)


def _git_parse_describe(
    describe_output: str,
) -> tuple[str, int, str | None, bool]:
    # 'describe_output' looks e.g. like 'v1.5.0-0-g4060507' or
    # 'v1.15.1rc1-37-g9bd1298-dirty'.
    # It may also just be a bare tag name if this is a tagged commit and we are
    # parsing a .git_archival.txt file.

    if describe_output.endswith("-dirty"):
        dirty = True
        describe_output = describe_output[:-6]
    else:
        dirty = False

    split = describe_output.rsplit("-", 2)
    if len(split) < 3:  # probably a tagged commit
        tag = describe_output
        number = 0
        node = None
    else:
        tag, number_, node = split
        number = int(number_)
    return tag, number, node, dirty


def archival_to_version(
    data: dict[str, str], config: Configuration
) -> ScmVersion | None:
    node: str | None
    log.debug("data %s", data)
    archival_describe = data.get("describe-name", DESCRIBE_UNSUPPORTED)
    if DESCRIBE_UNSUPPORTED in archival_describe:
        warnings.warn("git archive did not support describe output")
    else:
        tag, number, node, _ = _git_parse_describe(archival_describe)
        return meta(
            tag,
            config=config,
            distance=number,
            node=node,
        )

    for ref in REF_TAG_RE.findall(data.get("ref-names", "")):
        version = tag_to_version(ref, config)
        if version is not None:
            return meta(version, config=config)
    else:
        node = data.get("node")
        if node is None:
            return None
        elif "$FORMAT" in node.upper():
            warnings.warn("unprocessed git archival found (no export subst applied)")
            return None
        else:
            return meta("0.0", node=node, config=config)


def parse_archival(root: _t.PathT, config: Configuration) -> ScmVersion | None:
    archival = os.path.join(root, ".git_archival.txt")
    data = data_from_mime(archival)
    return archival_to_version(data, config=config)

        </content>
    </file>
    <file>
        <name>hg.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\hg.py</path>
        <content>
from __future__ import annotations

import datetime
import logging
import os

from pathlib import Path
from typing import TYPE_CHECKING

from . import Configuration
from ._version_cls import Version
from .integration import data_from_mime
from .scm_workdir import Workdir
from .version import ScmVersion
from .version import meta
from .version import tag_to_version

if TYPE_CHECKING:
    from . import _types as _t

from ._run_cmd import require_command as _require_command
from ._run_cmd import run as _run

log = logging.getLogger(__name__)


class HgWorkdir(Workdir):
    @classmethod
    def from_potential_worktree(cls, wd: _t.PathT) -> HgWorkdir | None:
        res = _run(["hg", "root"], wd)
        if res.returncode:
            return None
        return cls(Path(res.stdout))

    def get_meta(self, config: Configuration) -> ScmVersion | None:
        node: str
        tags_str: str
        node_date_str: str
        node, tags_str, node_date_str = self.hg_log(
            ".", "{node}\n{tag}\n{date|shortdate}"
        ).split("\n")

        # TODO: support bookmarks and topics (but nowadays bookmarks are
        # mainly used to emulate Git branches, which is already supported with
        # the dedicated class GitWorkdirHgClient)

        branch, dirty_str, dirty_date = _run(
            ["hg", "id", "-T", "{branch}\n{if(dirty, 1, 0)}\n{date|shortdate}"],
            cwd=self.path,
            check=True,
        ).stdout.split("\n")
        dirty = bool(int(dirty_str))
        node_date = datetime.date.fromisoformat(dirty_date if dirty else node_date_str)

        if node == "0" * len(node):
            log.debug("initial node %s", self.path)
            return meta(
                Version("0.0"),
                config=config,
                dirty=dirty,
                branch=branch,
                node_date=node_date,
            )

        node = "h" + node[:7]

        tags = tags_str.split()
        if "tip" in tags:
            # tip is not a real tag
            tags.remove("tip")

        if tags:
            tag = tag_to_version(tags[0], config)
            if tag:
                return meta(tag, dirty=dirty, branch=branch, config=config)

        try:
            tag_str = self.get_latest_normalizable_tag()
            if tag_str is None:
                dist = self.get_distance_revs("")
            else:
                dist = self.get_distance_revs(tag_str)

            if tag_str == "null" or tag_str is None:
                tag = Version("0.0")
                dist += 1
            else:
                tag = tag_to_version(tag_str, config=config)
                assert tag is not None

            if self.check_changes_since_tag(tag_str) or dirty:
                return meta(
                    tag,
                    distance=dist,
                    node=node,
                    dirty=dirty,
                    branch=branch,
                    config=config,
                    node_date=node_date,
                )
            else:
                return meta(tag, config=config, node_date=node_date)

        except ValueError as e:
            log.exception("error %s", e)
            pass  # unpacking failed, old hg

        return None

    def hg_log(self, revset: str, template: str) -> str:
        cmd = ["hg", "log", "-r", revset, "-T", template]

        return _run(cmd, cwd=self.path, check=True).stdout

    def get_latest_normalizable_tag(self) -> str | None:
        # Gets all tags containing a '.' (see #229) from oldest to newest
        outlines = self.hg_log(
            revset="ancestors(.) and tag('re:\\.')",
            template="{tags}{if(tags, '\n', '')}",
        ).split()
        if not outlines:
            return None
        tag = outlines[-1].split()[-1]
        return tag

    def get_distance_revs(self, rev1: str, rev2: str = ".") -> int:
        revset = f"({rev1}::{rev2})"
        out = self.hg_log(revset, ".")
        return len(out) - 1

    def check_changes_since_tag(self, tag: str | None) -> bool:
        if tag == "0.0" or tag is None:
            return True

        revset = (
            "(branch(.)"  # look for revisions in this branch only
            f" and tag({tag!r})::."  # after the last tag
            # ignore commits that only modify .hgtags and nothing else:
            " and (merge() or file('re:^(?!\\.hgtags).*$'))"
            f" and not tag({tag!r}))"  # ignore the tagged commit itself
        )

        return bool(self.hg_log(revset, "."))


def parse(root: _t.PathT, config: Configuration) -> ScmVersion | None:
    _require_command("hg")
    if os.path.exists(os.path.join(root, ".hg/git")):
        res = _run(["hg", "path"], root)
        if not res.returncode:
            for line in res.stdout.split("\n"):
                if line.startswith("default ="):
                    path = Path(line.split()[2])
                    if path.name.endswith(".git") or (path / ".git").exists():
                        from .git import _git_parse_inner
                        from .hg_git import GitWorkdirHgClient

                        wd_hggit = GitWorkdirHgClient.from_potential_worktree(root)
                        if wd_hggit:
                            return _git_parse_inner(config, wd_hggit)

    wd = HgWorkdir.from_potential_worktree(config.absolute_root)

    if wd is None:
        return None

    return wd.get_meta(config)


def archival_to_version(data: dict[str, str], config: Configuration) -> ScmVersion:
    log.debug("data %s", data)
    node = data.get("node", "")[:12]
    if node:
        node = "h" + node
    if "tag" in data:
        return meta(data["tag"], config=config)
    elif "latesttag" in data:
        return meta(
            data["latesttag"],
            distance=int(data["latesttagdistance"]),
            node=node,
            branch=data.get("branch"),
            config=config,
        )
    else:
        return meta(config.version_cls("0.0"), node=node, config=config)


def parse_archival(root: _t.PathT, config: Configuration) -> ScmVersion:
    archival = os.path.join(root, ".hg_archival.txt")
    data = data_from_mime(archival)
    return archival_to_version(data, config=config)

        </content>
    </file>
    <file>
        <name>hg_git.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\hg_git.py</path>
        <content>
from __future__ import annotations

import logging
import os

from contextlib import suppress
from datetime import date
from pathlib import Path

from . import _types as _t
from ._run_cmd import CompletedProcess as _CompletedProcess
from ._run_cmd import require_command
from ._run_cmd import run as _run
from .git import GitWorkdir
from .hg import HgWorkdir

log = logging.getLogger(__name__)

_FAKE_GIT_DESCRIBE_ERROR = _CompletedProcess(
    "fake git describe output for hg",
    1,
    "<>hg git failed to describe",
)


class GitWorkdirHgClient(GitWorkdir, HgWorkdir):
    COMMAND = "hg"

    @classmethod
    def from_potential_worktree(cls, wd: _t.PathT) -> GitWorkdirHgClient | None:
        require_command("hg")
        res = _run(["hg", "root"], cwd=wd).parse_success(parse=Path)
        if res is None:
            return None
        return cls(res)

    def is_dirty(self) -> bool:
        res = _run(["hg", "id", "-T", "{dirty}"], cwd=self.path, check=True)
        return bool(res.stdout)

    def get_branch(self) -> str | None:
        res = _run(["hg", "id", "-T", "{bookmarks}"], cwd=self.path)
        if res.returncode:
            log.info("branch err %s", res)
            return None
        return res.stdout

    def get_head_date(self) -> date | None:
        return _run('hg log -r . -T "{shortdate(date)}"', cwd=self.path).parse_success(
            parse=date.fromisoformat, error_msg="head date err"
        )

    def is_shallow(self) -> bool:
        return False

    def fetch_shallow(self) -> None:
        pass

    def get_hg_node(self) -> str | None:
        res = _run('hg log -r . -T "{node}"', cwd=self.path)
        if res.returncode:
            return None
        else:
            return res.stdout

    def _hg2git(self, hg_node: str) -> str | None:
        with suppress(FileNotFoundError):
            with open(os.path.join(self.path, ".hg/git-mapfile")) as map_items:
                for item in map_items:
                    if hg_node in item:
                        git_node, hg_node = item.split()
                        return git_node
        return None

    def node(self) -> str | None:
        hg_node = self.get_hg_node()
        if hg_node is None:
            return None

        git_node = self._hg2git(hg_node)

        if git_node is None:
            # trying again after hg -> git
            _run(["hg", "gexport"], cwd=self.path)
            git_node = self._hg2git(hg_node)

            if git_node is None:
                log.debug("Cannot get git node so we use hg node %s", hg_node)

                if hg_node == "0" * len(hg_node):
                    # mimic Git behavior
                    return None

                return hg_node

        return git_node[:7]

    def count_all_nodes(self) -> int:
        res = _run(["hg", "log", "-r", "ancestors(.)", "-T", "."], cwd=self.path)
        return len(res.stdout)

    def default_describe(self) -> _CompletedProcess:
        """
        Tentative to reproduce the output of

        `git describe --dirty --tags --long --match *[0-9]*`

        """
        res = _run(
            [
                "hg",
                "log",
                "-r",
                "(reverse(ancestors(.)) and tag(r're:v?[0-9].*'))",
                "-T",
                "{tags}{if(tags, ' ', '')}",
            ],
            cwd=self.path,
        )
        if res.returncode:
            return _FAKE_GIT_DESCRIBE_ERROR
        hg_tags: list[str] = res.stdout.split()

        if not hg_tags:
            return _FAKE_GIT_DESCRIBE_ERROR

        with self.path.joinpath(".hg/git-tags").open() as fp:
            git_tags: dict[str, str] = dict(line.split()[::-1] for line in fp)

        tag: str
        for hg_tag in hg_tags:
            if hg_tag in git_tags:
                tag = hg_tag
                break
        else:
            logging.warning("tag not found hg=%s git=%s", hg_tags, git_tags)
            return _FAKE_GIT_DESCRIBE_ERROR

        res = _run(["hg", "log", "-r", f"'{tag}'::.", "-T", "."], cwd=self.path)
        if res.returncode:
            return _FAKE_GIT_DESCRIBE_ERROR
        distance = len(res.stdout) - 1

        node = self.node()
        assert node is not None
        desc = f"{tag}-{distance}-g{node}"

        if self.is_dirty():
            desc += "-dirty"
        log.debug("faked describe %r", desc)
        return _CompletedProcess(
            ["setuptools-scm", "faked", "describe"],
            returncode=0,
            stdout=desc,
            stderr="",
        )

        </content>
    </file>
    <file>
        <name>integration.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\integration.py</path>
        <content>
from __future__ import annotations

import logging
import textwrap

from pathlib import Path

from . import _types as _t

log = logging.getLogger(__name__)


def data_from_mime(path: _t.PathT, content: None | str = None) -> dict[str, str]:
    """return a mapping from mime/pseudo-mime content
    :param path: path to the mime file
    :param content: content of the mime file, if None, read from path
    :rtype: dict[str, str]

    """

    if content is None:
        content = Path(path).read_text(encoding="utf-8")
    log.debug("mime %s content:\n%s", path, textwrap.indent(content, "    "))

    from email.parser import HeaderParser

    parser = HeaderParser()
    message = parser.parsestr(content)
    data = dict(message.items())
    log.debug("mime %s data:\n%s", path, data)
    return data

        </content>
    </file>
    <file>
        <name>scm_workdir.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\scm_workdir.py</path>
        <content>
from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path

from ._config import Configuration
from .version import ScmVersion


@dataclass()
class Workdir:
    path: Path

    def run_describe(self, config: Configuration) -> ScmVersion:
        raise NotImplementedError(self.run_describe)

        </content>
    </file>
    <file>
        <name>version.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\version.py</path>
        <content>
from __future__ import annotations

import dataclasses
import logging
import os
import re
import warnings

from datetime import date
from datetime import datetime
from datetime import timezone
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Match

from . import _entrypoints
from . import _modify_version

if TYPE_CHECKING:
    import sys

    if sys.version_info >= (3, 10):
        from typing import Concatenate
        from typing import ParamSpec
    else:
        from typing_extensions import Concatenate
        from typing_extensions import ParamSpec

    _P = ParamSpec("_P")

from typing import TypedDict

from . import _config
from . import _version_cls as _v
from ._version_cls import Version as PkgVersion
from ._version_cls import _VersionT

log = logging.getLogger(__name__)


SEMVER_MINOR = 2
SEMVER_PATCH = 3
SEMVER_LEN = 3


class _TagDict(TypedDict):
    version: str
    prefix: str
    suffix: str


def _parse_version_tag(
    tag: str | object, config: _config.Configuration
) -> _TagDict | None:
    match = config.tag_regex.match(str(tag))

    if match:
        key: str | int = 1 if len(match.groups()) == 1 else "version"
        full = match.group(0)
        log.debug("%r %r %s", tag, config.tag_regex, match)
        log.debug(
            "key %s data %s, %s, %r", key, match.groupdict(), match.groups(), full
        )
        result = _TagDict(
            version=match.group(key),
            prefix=full[: match.start(key)],
            suffix=full[match.end(key) :],
        )

        log.debug("tag %r parsed to %r", tag, result)
        assert result["version"]
        return result
    else:
        log.debug("tag %r did not parse", tag)

        return None


def callable_or_entrypoint(group: str, callable_or_name: str | Any) -> Any:
    log.debug("ep %r %r", group, callable_or_name)

    if callable(callable_or_name):
        return callable_or_name
    from ._entrypoints import iter_entry_points

    for ep in iter_entry_points(group, callable_or_name):
        log.debug("ep found: %s", ep.name)
        return ep.load()


def tag_to_version(
    tag: _VersionT | str, config: _config.Configuration
) -> _VersionT | None:
    """
    take a tag that might be prefixed with a keyword and return only the version part
    """
    log.debug("tag %s", tag)

    tag_dict = _parse_version_tag(tag, config)
    if tag_dict is None or not tag_dict.get("version", None):
        warnings.warn(f"tag {tag!r} no version found")
        return None

    version_str = tag_dict["version"]
    log.debug("version pre parse %s", version_str)

    if suffix := tag_dict.get("suffix", ""):
        warnings.warn(f"tag {tag!r} will be stripped of its suffix {suffix!r}")

    version: _VersionT = config.version_cls(version_str)
    log.debug("version=%r", version)

    return version


def _source_epoch_or_utc_now() -> datetime:
    if "SOURCE_DATE_EPOCH" in os.environ:
        date_epoch = int(os.environ["SOURCE_DATE_EPOCH"])
        return datetime.fromtimestamp(date_epoch, timezone.utc)
    else:
        return datetime.now(timezone.utc)


@dataclasses.dataclass
class ScmVersion:
    """represents a parsed version from scm"""

    tag: _v.Version | _v.NonNormalizedVersion | str
    """the related tag or preformatted version string"""
    config: _config.Configuration
    """the configuration used to parse the version"""
    distance: int = 0
    """the number of commits since the tag"""
    node: str | None = None
    """the shortened node id"""
    dirty: bool = False
    """whether the working copy had uncommitted changes"""
    preformatted: bool = False
    """whether the version string was preformatted"""
    branch: str | None = None
    """the branch name if any"""
    node_date: date | None = None
    """the date of the commit if available"""
    time: datetime = dataclasses.field(default_factory=_source_epoch_or_utc_now)
    """the current time or source epoch time
    only set for unit-testing version schemes
    for real usage it must be `now(utc)` or `SOURCE_EPOCH`
    """

    @property
    def exact(self) -> bool:
        """returns true checked out exactly on a tag and no local changes apply"""
        return self.distance == 0 and not self.dirty

    def __repr__(self) -> str:
        return (
            f"<ScmVersion {self.tag} dist={self.distance} "
            f"node={self.node} dirty={self.dirty} branch={self.branch}>"
        )

    def format_with(self, fmt: str, **kw: object) -> str:
        """format a given format string with attributes of this object"""
        return fmt.format(
            time=self.time,
            tag=self.tag,
            distance=self.distance,
            node=self.node,
            dirty=self.dirty,
            branch=self.branch,
            node_date=self.node_date,
            **kw,
        )

    def format_choice(self, clean_format: str, dirty_format: str, **kw: object) -> str:
        """given `clean_format` and `dirty_format`

        choose one based on `self.dirty` and format it using `self.format_with`"""

        return self.format_with(dirty_format if self.dirty else clean_format, **kw)

    def format_next_version(
        self,
        guess_next: Callable[Concatenate[ScmVersion, _P], str],
        fmt: str = "{guessed}.dev{distance}",
        *k: _P.args,
        **kw: _P.kwargs,
    ) -> str:
        guessed = guess_next(self, *k, **kw)
        return self.format_with(fmt, guessed=guessed)


def _parse_tag(
    tag: _VersionT | str, preformatted: bool, config: _config.Configuration
) -> _VersionT | str:
    if preformatted:
        return tag
    elif not isinstance(tag, config.version_cls):
        version = tag_to_version(tag, config)
        assert version is not None
        return version
    else:
        return tag


def meta(
    tag: str | _VersionT,
    *,
    distance: int = 0,
    dirty: bool = False,
    node: str | None = None,
    preformatted: bool = False,
    branch: str | None = None,
    config: _config.Configuration,
    node_date: date | None = None,
) -> ScmVersion:
    parsed_version = _parse_tag(tag, preformatted, config)
    log.info("version %s -> %s", tag, parsed_version)
    assert parsed_version is not None, "Can't parse version %s" % tag
    return ScmVersion(
        parsed_version,
        distance=distance,
        node=node,
        dirty=dirty,
        preformatted=preformatted,
        branch=branch,
        config=config,
        node_date=node_date,
    )


def guess_next_version(tag_version: ScmVersion) -> str:
    version = _modify_version.strip_local(str(tag_version.tag))
    return _modify_version._bump_dev(version) or _modify_version._bump_regex(version)


def guess_next_dev_version(version: ScmVersion) -> str:
    if version.exact:
        return version.format_with("{tag}")
    else:
        return version.format_next_version(guess_next_version)


def guess_next_simple_semver(
    version: ScmVersion, retain: int, increment: bool = True
) -> str:
    if isinstance(version.tag, _v.Version):
        parts = list(version.tag.release[:retain])
    else:
        try:
            parts = [int(i) for i in str(version.tag).split(".")[:retain]]
        except ValueError:
            raise ValueError(f"{version} can't be parsed as numeric version") from None
    while len(parts) < retain:
        parts.append(0)
    if increment:
        parts[-1] += 1
    while len(parts) < SEMVER_LEN:
        parts.append(0)
    return ".".join(str(i) for i in parts)


def simplified_semver_version(version: ScmVersion) -> str:
    if version.exact:
        return guess_next_simple_semver(version, retain=SEMVER_LEN, increment=False)
    else:
        if version.branch is not None and "feature" in version.branch:
            return version.format_next_version(
                guess_next_simple_semver, retain=SEMVER_MINOR
            )
        else:
            return version.format_next_version(
                guess_next_simple_semver, retain=SEMVER_PATCH
            )


def release_branch_semver_version(version: ScmVersion) -> str:
    if version.exact:
        return version.format_with("{tag}")
    if version.branch is not None:
        # Does the branch name (stripped of namespace) parse as a version?
        branch_ver_data = _parse_version_tag(
            version.branch.split("/")[-1], version.config
        )
        if branch_ver_data is not None:
            branch_ver = branch_ver_data["version"]
            if branch_ver[0] == "v":
                # Allow branches that start with 'v', similar to Version.
                branch_ver = branch_ver[1:]
            # Does the branch version up to the minor part match the tag? If not it
            # might be like, an issue number or something and not a version number, so
            # we only want to use it if it matches.
            tag_ver_up_to_minor = str(version.tag).split(".")[:SEMVER_MINOR]
            branch_ver_up_to_minor = branch_ver.split(".")[:SEMVER_MINOR]
            if branch_ver_up_to_minor == tag_ver_up_to_minor:
                # We're in a release/maintenance branch, next is a patch/rc/beta bump:
                return version.format_next_version(guess_next_version)
    # We're in a development branch, next is a minor bump:
    return version.format_next_version(guess_next_simple_semver, retain=SEMVER_MINOR)


def release_branch_semver(version: ScmVersion) -> str:
    warnings.warn(
        "release_branch_semver is deprecated and will be removed in the future. "
        "Use release_branch_semver_version instead",
        category=DeprecationWarning,
        stacklevel=2,
    )
    return release_branch_semver_version(version)


def only_version(version: ScmVersion) -> str:
    return version.format_with("{tag}")


def no_guess_dev_version(version: ScmVersion) -> str:
    if version.exact:
        return version.format_with("{tag}")
    else:
        return version.format_next_version(_modify_version._dont_guess_next_version)


_DATE_REGEX = re.compile(
    r"""
    ^(?P<date>
        (?P<prefix>[vV]?)
        (?P<year>\d{2}|\d{4})(?:\.\d{1,2}){2})
        (?:\.(?P<patch>\d*))?$
    """,
    re.VERBOSE,
)


def date_ver_match(ver: str) -> Match[str] | None:
    return _DATE_REGEX.match(ver)


def guess_next_date_ver(
    version: ScmVersion,
    node_date: date | None = None,
    date_fmt: str | None = None,
    version_cls: type | None = None,
) -> str:
    """
    same-day -> patch +1
    other-day -> today

    distance is always added as .devX
    """
    match = date_ver_match(str(version.tag))
    if match is None:
        warnings.warn(
            f"{version} does not correspond to a valid versioning date, "
            "assuming legacy version"
        )
        if date_fmt is None:
            date_fmt = "%y.%m.%d"
    else:
        # deduct date format if not provided
        if date_fmt is None:
            date_fmt = "%Y.%m.%d" if len(match.group("year")) == 4 else "%y.%m.%d"
        if prefix := match.group("prefix"):
            if not date_fmt.startswith(prefix):
                date_fmt = prefix + date_fmt

    today = version.time.date()
    head_date = node_date or today
    # compute patch
    if match is None:
        tag_date = today
    else:
        tag_date = (
            datetime.strptime(match.group("date"), date_fmt)
            .replace(tzinfo=timezone.utc)
            .date()
        )
    if tag_date == head_date:
        patch = "0" if match is None else (match.group("patch") or "0")
        patch = int(patch) + 1
    else:
        if tag_date > head_date and match is not None:
            # warn on future times
            warnings.warn(
                f"your previous tag  ({tag_date})"
                f" is ahead your node date ({head_date})"
            )
        patch = 0
    next_version = "{node_date:{date_fmt}}.{patch}".format(
        node_date=head_date, date_fmt=date_fmt, patch=patch
    )
    # rely on the Version object to ensure consistency (e.g. remove leading 0s)
    if version_cls is None:
        version_cls = PkgVersion
    next_version = str(version_cls(next_version))
    return next_version


def calver_by_date(version: ScmVersion) -> str:
    if version.exact and not version.dirty:
        return version.format_with("{tag}")
    # TODO: move the release-X check to a new scheme
    if version.branch is not None and version.branch.startswith("release-"):
        branch_ver = _parse_version_tag(version.branch.split("-")[-1], version.config)
        if branch_ver is not None:
            ver = branch_ver["version"]
            match = date_ver_match(ver)
            if match:
                return ver
    return version.format_next_version(
        guess_next_date_ver,
        node_date=version.node_date,
        version_cls=version.config.version_cls,
    )


def get_local_node_and_date(version: ScmVersion) -> str:
    return _modify_version._format_local_with_time(version, time_format="%Y%m%d")


def get_local_node_and_timestamp(version: ScmVersion) -> str:
    return _modify_version._format_local_with_time(version, time_format="%Y%m%d%H%M%S")


def get_local_dirty_tag(version: ScmVersion) -> str:
    return version.format_choice("", "+dirty")


def get_no_local_node(version: ScmVersion) -> str:
    return ""


def postrelease_version(version: ScmVersion) -> str:
    if version.exact:
        return version.format_with("{tag}")
    else:
        return version.format_with("{tag}.post{distance}")


def format_version(version: ScmVersion) -> str:
    log.debug("scm version %s", version)
    log.debug("config %s", version.config)
    if version.preformatted:
        assert isinstance(version.tag, str)
        return version.tag
    main_version = _entrypoints._call_version_scheme(
        version, "setuptools_scm.version_scheme", version.config.version_scheme, None
    )
    log.debug("version %s", main_version)
    assert main_version is not None
    local_version = _entrypoints._call_version_scheme(
        version, "setuptools_scm.local_scheme", version.config.local_scheme, "+unknown"
    )
    log.debug("local_version %s", local_version)
    return main_version + local_version

        </content>
    </file>
    <file>
        <name>_cli.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_cli.py</path>
        <content>
from __future__ import annotations

import argparse
import json
import os
import sys

from typing import Any

from setuptools_scm import Configuration
from setuptools_scm._file_finders import find_files
from setuptools_scm._get_version_impl import _get_version
from setuptools_scm.discover import walk_potential_roots


def main(args: list[str] | None = None) -> int:
    opts = _get_cli_opts(args)
    inferred_root: str = opts.root or "."

    pyproject = opts.config or _find_pyproject(inferred_root)

    try:
        config = Configuration.from_file(
            pyproject,
            root=(os.path.abspath(opts.root) if opts.root is not None else None),
        )
    except (LookupError, FileNotFoundError) as ex:
        # no pyproject.toml OR no [tool.setuptools_scm]
        print(
            f"Warning: could not use {os.path.relpath(pyproject)},"
            " using default configuration.\n"
            f" Reason: {ex}.",
            file=sys.stderr,
        )
        config = Configuration(root=inferred_root)

    version = _get_version(
        config, force_write_version_files=opts.force_write_version_files
    )
    if version is None:
        raise SystemExit("ERROR: no version found for", opts)
    if opts.strip_dev:
        version = version.partition(".dev")[0]

    return command(opts, version, config)


def _get_cli_opts(args: list[str] | None) -> argparse.Namespace:
    prog = "python -m setuptools_scm"
    desc = "Print project version according to SCM metadata"
    parser = argparse.ArgumentParser(prog, description=desc)
    # By default, help for `--help` starts with lower case, so we keep the pattern:
    parser.add_argument(
        "-r",
        "--root",
        default=None,
        help='directory managed by the SCM, default: inferred from config file, or "."',
    )
    parser.add_argument(
        "-c",
        "--config",
        default=None,
        metavar="PATH",
        help="path to 'pyproject.toml' with setuptools_scm config, "
        "default: looked up in the current or parent directories",
    )
    parser.add_argument(
        "--strip-dev",
        action="store_true",
        help="remove the dev/local parts of the version before printing the version",
    )
    parser.add_argument(
        "-N",
        "--no-version",
        action="store_true",
        help="do not include package version in the output",
    )
    output_formats = ["json", "plain", "key-value"]
    parser.add_argument(
        "-f",
        "--format",
        type=str.casefold,
        default="plain",
        help="specify output format",
        choices=output_formats,
    )
    parser.add_argument(
        "-q",
        "--query",
        type=str.casefold,
        nargs="*",
        help="display setuptools_scm settings according to query, "
        "e.g. dist_name, do not supply an argument in order to "
        "print a list of valid queries.",
    )
    parser.add_argument(
        "--force-write-version-files",
        action="store_true",
        help="trigger to write the content of the version files\n"
        "its recommended to use normal/editable installation instead)",
    )
    sub = parser.add_subparsers(title="extra commands", dest="command", metavar="")
    # We avoid `metavar` to prevent printing repetitive information
    desc = "List information about the package, e.g. included files"
    sub.add_parser("ls", help=desc[0].lower() + desc[1:], description=desc)
    return parser.parse_args(args)


# flake8: noqa: C901
def command(opts: argparse.Namespace, version: str, config: Configuration) -> int:
    data: dict[str, Any] = {}

    if opts.command == "ls":
        opts.query = ["files"]

    if opts.query == []:
        opts.no_version = True
        sys.stderr.write("Available queries:\n\n")
        opts.query = ["queries"]
        data["queries"] = ["files", *config.__dataclass_fields__]

    if opts.query is None:
        opts.query = []

    if not opts.no_version:
        data["version"] = version

    if "files" in opts.query:
        data["files"] = find_files(config.root)

    for q in opts.query:
        if q in ["files", "queries", "version"]:
            continue

        try:
            if q.startswith("_"):
                raise AttributeError()
            data[q] = getattr(config, q)
        except AttributeError:
            sys.stderr.write(f"Error: unknown query: '{q}'\n")
            return 1

    if opts.format == "json":
        print(json.dumps(data, indent=2))

    if opts.format == "plain":
        _print_plain(data)

    if opts.format == "key-value":
        _print_key_value(data)

    return 0


def _print_plain(data: dict[str, Any]) -> None:
    version = data.pop("version", None)
    if version:
        print(version)
    files = data.pop("files", [])
    for file_ in files:
        print(file_)
    queries = data.pop("queries", [])
    for query in queries:
        print(query)
    if data:
        print("\n".join(data.values()))


def _print_key_value(data: dict[str, Any]) -> None:
    for key, value in data.items():
        if isinstance(value, str):
            print(f"{key} = {value}")
        else:
            str_value = "\n  ".join(value)
            print(f"{key} = {str_value}")


def _find_pyproject(parent: str) -> str:
    for directory in walk_potential_roots(os.path.abspath(parent)):
        pyproject = os.path.join(directory, "pyproject.toml")
        if os.path.isfile(pyproject):
            return pyproject

    return os.path.abspath(
        "pyproject.toml"
    )  # use default name to trigger the default errors

        </content>
    </file>
    <file>
        <name>_config.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_config.py</path>
        <content>
"""configuration"""

from __future__ import annotations

import dataclasses
import os
import re
import warnings

from pathlib import Path
from typing import Any
from typing import Pattern
from typing import Protocol

from . import _log
from . import _types as _t
from ._integration.pyproject_reading import (
    get_args_for_pyproject as _get_args_for_pyproject,
)
from ._integration.pyproject_reading import read_pyproject as _read_pyproject
from ._overrides import read_toml_overrides
from ._version_cls import Version as _Version
from ._version_cls import _validate_version_cls
from ._version_cls import _VersionT

log = _log.log.getChild("config")

DEFAULT_TAG_REGEX = re.compile(
    r"^(?:[\w-]+-)?(?P<version>[vV]?\d+(?:\.\d+){0,2}[^\+]*)(?:\+.*)?$"
)
"""default tag regex that tries to match PEP440 style versions
with prefix consisting of dashed words"""

DEFAULT_VERSION_SCHEME = "guess-next-dev"
DEFAULT_LOCAL_SCHEME = "node-and-date"


def _check_tag_regex(value: str | Pattern[str] | None) -> Pattern[str]:
    if not value:
        regex = DEFAULT_TAG_REGEX
    else:
        regex = re.compile(value)

    group_names = regex.groupindex.keys()
    if regex.groups == 0 or (regex.groups > 1 and "version" not in group_names):
        warnings.warn(
            "Expected tag_regex to contain a single match group or a group named"
            " 'version' to identify the version part of any tag."
        )

    return regex


class ParseFunction(Protocol):
    def __call__(
        self, root: _t.PathT, *, config: Configuration
    ) -> _t.SCMVERSION | None: ...


def _check_absolute_root(root: _t.PathT, relative_to: _t.PathT | None) -> str:
    log.debug("check absolute root=%s relative_to=%s", root, relative_to)
    if relative_to:
        if (
            os.path.isabs(root)
            and os.path.isabs(relative_to)
            and not os.path.commonpath([root, relative_to]) == root
        ):
            warnings.warn(
                f"absolute root path '{root}' overrides relative_to '{relative_to}'"
            )
        if os.path.isdir(relative_to):
            warnings.warn(
                "relative_to is expected to be a file,"
                f" its the directory {relative_to}\n"
                "assuming the parent directory was passed"
            )
            log.debug("dir %s", relative_to)
            root = os.path.join(relative_to, root)
        else:
            log.debug("file %s", relative_to)
            root = os.path.join(os.path.dirname(relative_to), root)
    return os.path.abspath(root)


@dataclasses.dataclass
class Configuration:
    """Global configuration model"""

    relative_to: _t.PathT | None = None
    root: _t.PathT = "."
    version_scheme: _t.VERSION_SCHEME = DEFAULT_VERSION_SCHEME
    local_scheme: _t.VERSION_SCHEME = DEFAULT_LOCAL_SCHEME
    tag_regex: Pattern[str] = DEFAULT_TAG_REGEX
    parentdir_prefix_version: str | None = None
    fallback_version: str | None = None
    fallback_root: _t.PathT = "."
    write_to: _t.PathT | None = None
    write_to_template: str | None = None
    version_file: _t.PathT | None = None
    version_file_template: str | None = None
    parse: ParseFunction | None = None
    git_describe_command: _t.CMD_TYPE | None = None
    dist_name: str | None = None
    version_cls: type[_VersionT] = _Version
    search_parent_directories: bool = False

    parent: _t.PathT | None = None

    @property
    def absolute_root(self) -> str:
        return _check_absolute_root(self.root, self.relative_to)

    @classmethod
    def from_file(
        cls,
        name: str | os.PathLike[str] = "pyproject.toml",
        dist_name: str | None = None,
        _require_section: bool = True,
        **kwargs: Any,
    ) -> Configuration:
        """
        Read Configuration from pyproject.toml (or similar).
        Raises exceptions when file is not found or toml is
        not installed or the file has invalid format or does
        not contain the [tool.setuptools_scm] section.
        """

        pyproject_data = _read_pyproject(Path(name), require_section=_require_section)
        args = _get_args_for_pyproject(pyproject_data, dist_name, kwargs)

        args.update(read_toml_overrides(args["dist_name"]))
        relative_to = args.pop("relative_to", name)
        return cls.from_data(relative_to=relative_to, data=args)

    @classmethod
    def from_data(
        cls, relative_to: str | os.PathLike[str], data: dict[str, Any]
    ) -> Configuration:
        """
        given configuration data
        create a config instance after validating tag regex/version class
        """
        tag_regex = _check_tag_regex(data.pop("tag_regex", None))
        version_cls = _validate_version_cls(
            data.pop("version_cls", None), data.pop("normalize", True)
        )
        return cls(
            relative_to=relative_to,
            version_cls=version_cls,
            tag_regex=tag_regex,
            **data,
        )

        </content>
    </file>
    <file>
        <name>_entrypoints.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_entrypoints.py</path>
        <content>
from __future__ import annotations

import sys

from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Iterator
from typing import cast
from typing import overload

from . import _log
from . import version

if TYPE_CHECKING:
    from . import _types as _t
    from ._config import Configuration
    from ._config import ParseFunction


from importlib.metadata import EntryPoint as EntryPoint

if sys.version_info[:2] < (3, 10):
    from importlib.metadata import entry_points as legacy_entry_points

    class EntryPoints:
        _groupdata: list[EntryPoint]

        def __init__(self, groupdata: list[EntryPoint]) -> None:
            self._groupdata = groupdata

        def select(self, name: str) -> EntryPoints:
            return EntryPoints([x for x in self._groupdata if x.name == name])

        def __iter__(self) -> Iterator[EntryPoint]:
            return iter(self._groupdata)

    def entry_points(group: str) -> EntryPoints:
        return EntryPoints(legacy_entry_points()[group])

else:
    from importlib.metadata import EntryPoints
    from importlib.metadata import entry_points


log = _log.log.getChild("entrypoints")


def version_from_entrypoint(
    config: Configuration, *, entrypoint: str, root: _t.PathT
) -> version.ScmVersion | None:
    from .discover import iter_matching_entrypoints

    log.debug("version_from_ep %s in %s", entrypoint, root)
    for ep in iter_matching_entrypoints(root, entrypoint, config):
        fn: ParseFunction = ep.load()
        maybe_version: version.ScmVersion | None = fn(root, config=config)
        log.debug("%s found %r", ep, maybe_version)
        if maybe_version is not None:
            return maybe_version
    return None


def iter_entry_points(group: str, name: str | None = None) -> Iterator[EntryPoint]:
    eps: EntryPoints = entry_points(group=group)
    res = eps if name is None else eps.select(name=name)

    return iter(res)


def _get_ep(group: str, name: str) -> Any | None:
    for ep in iter_entry_points(group, name):
        log.debug("ep found: %s", ep.name)
        return ep.load()
    else:
        return None


def _get_from_object_reference_str(path: str, group: str) -> Any | None:
    # todo: remove for importlib native spelling
    ep = EntryPoint(path, path, group)
    try:
        return ep.load()
    except (AttributeError, ModuleNotFoundError):
        return None


def _iter_version_schemes(
    entrypoint: str,
    scheme_value: _t.VERSION_SCHEMES,
    _memo: set[object] | None = None,
) -> Iterator[Callable[[version.ScmVersion], str]]:
    if _memo is None:
        _memo = set()
    if isinstance(scheme_value, str):
        scheme_value = cast(
            "_t.VERSION_SCHEMES",
            _get_ep(entrypoint, scheme_value)
            or _get_from_object_reference_str(scheme_value, entrypoint),
        )

    if isinstance(scheme_value, (list, tuple)):
        for variant in scheme_value:
            if variant not in _memo:
                _memo.add(variant)
                yield from _iter_version_schemes(entrypoint, variant, _memo=_memo)
    elif callable(scheme_value):
        yield scheme_value


@overload
def _call_version_scheme(
    version: version.ScmVersion,
    entrypoint: str,
    given_value: _t.VERSION_SCHEMES,
    default: str,
) -> str: ...


@overload
def _call_version_scheme(
    version: version.ScmVersion,
    entrypoint: str,
    given_value: _t.VERSION_SCHEMES,
    default: None,
) -> str | None: ...


def _call_version_scheme(
    version: version.ScmVersion,
    entrypoint: str,
    given_value: _t.VERSION_SCHEMES,
    default: str | None,
) -> str | None:
    for scheme in _iter_version_schemes(entrypoint, given_value):
        result = scheme(version)
        if result is not None:
            return result
    return default

        </content>
    </file>
    <file>
        <name>_get_version_impl.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_get_version_impl.py</path>
        <content>
from __future__ import annotations

import logging
import re
import warnings

from pathlib import Path
from typing import Any
from typing import NoReturn
from typing import Pattern

from . import _config
from . import _entrypoints
from . import _run_cmd
from . import _types as _t
from ._config import Configuration
from ._overrides import _read_pretended_version_for
from ._version_cls import _validate_version_cls
from .version import ScmVersion
from .version import format_version as _format_version

EMPTY_TAG_REGEX_DEPRECATION = DeprecationWarning(
    "empty regex for tag regex is invalid, using default"
)

_log = logging.getLogger(__name__)


def parse_scm_version(config: Configuration) -> ScmVersion | None:
    try:
        if config.parse is not None:
            parse_result = config.parse(config.absolute_root, config=config)
            if parse_result is not None and not isinstance(parse_result, ScmVersion):
                raise TypeError(
                    f"version parse result was {str!r}\n"
                    "please return a parsed version (ScmVersion)"
                )
            return parse_result
        else:
            return _entrypoints.version_from_entrypoint(
                config,
                entrypoint="setuptools_scm.parse_scm",
                root=config.absolute_root,
            )
    except _run_cmd.CommandNotFoundError as e:
        _log.exception("command %s not found while parsing the scm, using fallbacks", e)
        return None


def parse_fallback_version(config: Configuration) -> ScmVersion | None:
    return _entrypoints.version_from_entrypoint(
        config,
        entrypoint="setuptools_scm.parse_scm_fallback",
        root=config.fallback_root,
    )


def parse_version(config: Configuration) -> ScmVersion | None:
    return (
        _read_pretended_version_for(config)
        or parse_scm_version(config)
        or parse_fallback_version(config)
    )


def write_version_files(
    config: Configuration, version: str, scm_version: ScmVersion
) -> None:
    if config.write_to is not None:
        from ._integration.dump_version import dump_version

        dump_version(
            root=config.root,
            version=version,
            scm_version=scm_version,
            write_to=config.write_to,
            template=config.write_to_template,
        )
    if config.version_file:
        from ._integration.dump_version import write_version_to_path

        version_file = Path(config.version_file)
        assert not version_file.is_absolute(), f"{version_file=}"
        # todo: use a better name than fallback root
        assert config.relative_to is not None
        target = Path(config.relative_to).parent.joinpath(version_file)
        write_version_to_path(
            target,
            template=config.version_file_template,
            version=version,
            scm_version=scm_version,
        )


def _get_version(
    config: Configuration, force_write_version_files: bool | None = None
) -> str | None:
    parsed_version = parse_version(config)
    if parsed_version is None:
        return None
    version_string = _format_version(parsed_version)
    if force_write_version_files is None:
        force_write_version_files = True
        warnings.warn(
            "force_write_version_files ought to be set,"
            " presuming the legacy True value",
            DeprecationWarning,
        )

    if force_write_version_files:
        write_version_files(config, version=version_string, scm_version=parsed_version)

    return version_string


def _version_missing(config: Configuration) -> NoReturn:
    raise LookupError(
        f"setuptools-scm was unable to detect version for {config.absolute_root}.\n\n"
        "Make sure you're either building from a fully intact git repository "
        "or PyPI tarballs. Most other sources (such as GitHub's tarballs, a "
        "git checkout without the .git folder) don't contain the necessary "
        "metadata and will not work.\n\n"
        "For example, if you're using pip, instead of "
        "https://github.com/user/proj/archive/master.zip "
        "use git+https://github.com/user/proj.git#egg=proj"
    )


def get_version(
    root: _t.PathT = ".",
    version_scheme: _t.VERSION_SCHEME = _config.DEFAULT_VERSION_SCHEME,
    local_scheme: _t.VERSION_SCHEME = _config.DEFAULT_LOCAL_SCHEME,
    write_to: _t.PathT | None = None,
    write_to_template: str | None = None,
    version_file: _t.PathT | None = None,
    version_file_template: str | None = None,
    relative_to: _t.PathT | None = None,
    tag_regex: str | Pattern[str] = _config.DEFAULT_TAG_REGEX,
    parentdir_prefix_version: str | None = None,
    fallback_version: str | None = None,
    fallback_root: _t.PathT = ".",
    parse: Any | None = None,
    git_describe_command: _t.CMD_TYPE | None = None,
    dist_name: str | None = None,
    version_cls: Any | None = None,
    normalize: bool = True,
    search_parent_directories: bool = False,
) -> str:
    """
    If supplied, relative_to should be a file from which root may
    be resolved. Typically called by a script or module that is not
    in the root of the repository to direct setuptools_scm to the
    root of the repository by supplying ``__file__``.
    """

    version_cls = _validate_version_cls(version_cls, normalize)
    del normalize
    tag_regex = parse_tag_regex(tag_regex)
    config = Configuration(**locals())
    maybe_version = _get_version(config, force_write_version_files=True)

    if maybe_version is None:
        _version_missing(config)
    return maybe_version


def parse_tag_regex(tag_regex: str | Pattern[str]) -> Pattern[str]:
    if isinstance(tag_regex, str):
        if tag_regex == "":
            warnings.warn(EMPTY_TAG_REGEX_DEPRECATION)
            return _config.DEFAULT_TAG_REGEX
        else:
            return re.compile(tag_regex)
    else:
        return tag_regex

        </content>
    </file>
    <file>
        <name>_log.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_log.py</path>
        <content>
"""
logging helpers, supports vendoring
"""

from __future__ import annotations

import contextlib
import logging
import os
import sys

from typing import IO
from typing import Iterator
from typing import Mapping

log = logging.getLogger(__name__.rsplit(".", 1)[0])
log.propagate = False


class AlwaysStdErrHandler(logging.StreamHandler):  # type: ignore[type-arg]
    def __init___(self) -> None:
        super().__init__(sys.stderr)

    @property  # type: ignore [override]
    def stream(self) -> IO[str]:
        return sys.stderr

    @stream.setter
    def stream(self, value: IO[str]) -> None:
        assert value is sys.stderr


def make_default_handler() -> logging.Handler:
    try:
        from rich.console import Console

        console = Console(stderr=True)
        from rich.logging import RichHandler

        return RichHandler(console=console)
    except ImportError:
        handler = AlwaysStdErrHandler()
        handler.setFormatter(logging.Formatter("%(levelname)s %(name)s %(message)s"))
        return handler


_default_handler = make_default_handler()

log.addHandler(_default_handler)


def _default_log_level(_env: Mapping[str, str] = os.environ) -> int:
    val: str | None = _env.get("SETUPTOOLS_SCM_DEBUG")
    return logging.WARN if val is None else logging.DEBUG


log.setLevel(_default_log_level())


@contextlib.contextmanager
def defer_to_pytest() -> Iterator[None]:
    log.propagate = True
    old_level = log.level
    log.setLevel(logging.NOTSET)
    log.removeHandler(_default_handler)
    try:
        yield
    finally:
        log.addHandler(_default_handler)
        log.propagate = False
        log.setLevel(old_level)


@contextlib.contextmanager
def enable_debug(handler: logging.Handler = _default_handler) -> Iterator[None]:
    log.addHandler(handler)
    old_level = log.level
    log.setLevel(logging.DEBUG)
    old_handler_level = handler.level
    handler.setLevel(logging.DEBUG)
    try:
        yield
    finally:
        log.setLevel(old_level)
        handler.setLevel(old_handler_level)
        if handler is not _default_handler:
            log.removeHandler(handler)

        </content>
    </file>
    <file>
        <name>_modify_version.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_modify_version.py</path>
        <content>
from __future__ import annotations

import re

from . import _types as _t


def strip_local(version_string: str) -> str:
    public = version_string.partition("+")[0]
    return public


def _add_post(version: str) -> str:
    if "post" in version:
        raise ValueError(
            f"{version} already is a post release, refusing to guess the update"
        )
    return f"{version}.post1"


def _bump_dev(version: str) -> str | None:
    if ".dev" not in version:
        return None

    prefix, tail = version.rsplit(".dev", 1)
    if tail != "0":
        raise ValueError(
            "choosing custom numbers for the `.devX` distance "
            "is not supported.\n "
            f"The {version} can't be bumped\n"
            "Please drop the tag or create a new supported one ending in .dev0"
        )
    return prefix


def _bump_regex(version: str) -> str:
    match = re.match(r"(.*?)(\d+)$", version)
    if match is None:
        raise ValueError(
            f"{version} does not end with a number to bump, "
            "please correct or use a custom version scheme"
        )
    else:
        prefix, tail = match.groups()
        return f"{prefix}{int(tail) + 1}"


def _format_local_with_time(version: _t.SCMVERSION, time_format: str) -> str:
    if version.exact or version.node is None:
        return version.format_choice(
            "", "+d{time:{time_format}}", time_format=time_format
        )
    else:
        return version.format_choice(
            "+{node}", "+{node}.d{time:{time_format}}", time_format=time_format
        )


def _dont_guess_next_version(tag_version: _t.SCMVERSION) -> str:
    version = strip_local(str(tag_version.tag))
    return _bump_dev(version) or _add_post(version)

        </content>
    </file>
    <file>
        <name>_overrides.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_overrides.py</path>
        <content>
from __future__ import annotations

import os
import re

from typing import Any

from . import _config
from . import _log
from . import version
from ._integration.toml import load_toml_or_inline_map

log = _log.log.getChild("overrides")

PRETEND_KEY = "SETUPTOOLS_SCM_PRETEND_VERSION"
PRETEND_KEY_NAMED = PRETEND_KEY + "_FOR_{name}"


def read_named_env(
    *, tool: str = "SETUPTOOLS_SCM", name: str, dist_name: str | None
) -> str | None:
    """ """
    if dist_name is not None:
        # Normalize the dist name as per PEP 503.
        normalized_dist_name = re.sub(r"[-_.]+", "-", dist_name)
        env_var_dist_name = normalized_dist_name.replace("-", "_").upper()
        val = os.environ.get(f"{tool}_{name}_FOR_{env_var_dist_name}")
        if val is not None:
            return val
    return os.environ.get(f"{tool}_{name}")


def _read_pretended_version_for(
    config: _config.Configuration,
) -> version.ScmVersion | None:
    """read a a overridden version from the environment

    tries ``SETUPTOOLS_SCM_PRETEND_VERSION``
    and ``SETUPTOOLS_SCM_PRETEND_VERSION_FOR_$UPPERCASE_DIST_NAME``
    """
    log.debug("dist name: %s", config.dist_name)

    pretended = read_named_env(name="PRETEND_VERSION", dist_name=config.dist_name)

    if pretended:
        # we use meta here since the pretended version
        # must adhere to the pep to begin with
        return version.meta(tag=pretended, preformatted=True, config=config)
    else:
        return None


def read_toml_overrides(dist_name: str | None) -> dict[str, Any]:
    data = read_named_env(name="OVERRIDES", dist_name=dist_name)
    return load_toml_or_inline_map(data)

        </content>
    </file>
    <file>
        <name>_run_cmd.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_run_cmd.py</path>
        <content>
from __future__ import annotations

import os
import shlex
import subprocess
import textwrap
import warnings

from typing import TYPE_CHECKING
from typing import Callable
from typing import Final
from typing import Mapping
from typing import Sequence
from typing import TypeVar
from typing import overload

from . import _log
from . import _types as _t

if TYPE_CHECKING:
    BaseCompletedProcess = subprocess.CompletedProcess[str]
else:
    BaseCompletedProcess = subprocess.CompletedProcess

# pick 40 seconds
# unfortunately github CI for windows sometimes needs
# up to 30 seconds to start a command


def _get_timeout(env: Mapping[str, str]) -> int:
    return int(env.get("SETUPTOOLS_SCM_SUBPROCESS_TIMEOUT") or 40)


BROKEN_TIMEOUT: Final[int] = _get_timeout(os.environ)

log = _log.log.getChild("run_cmd")

PARSE_RESULT = TypeVar("PARSE_RESULT")
T = TypeVar("T")


class CompletedProcess(BaseCompletedProcess):
    @classmethod
    def from_raw(
        cls, input: BaseCompletedProcess, strip: bool = True
    ) -> CompletedProcess:
        return cls(
            args=input.args,
            returncode=input.returncode,
            stdout=input.stdout.strip() if strip and input.stdout else input.stdout,
            stderr=input.stderr.strip() if strip and input.stderr else input.stderr,
        )

    @overload
    def parse_success(
        self,
        parse: Callable[[str], PARSE_RESULT],
        default: None = None,
        error_msg: str | None = None,
    ) -> PARSE_RESULT | None: ...

    @overload
    def parse_success(
        self,
        parse: Callable[[str], PARSE_RESULT],
        default: T,
        error_msg: str | None = None,
    ) -> PARSE_RESULT | T: ...

    def parse_success(
        self,
        parse: Callable[[str], PARSE_RESULT],
        default: T | None = None,
        error_msg: str | None = None,
    ) -> PARSE_RESULT | T | None:
        if self.returncode:
            if error_msg:
                log.warning("%s %s", error_msg, self)
            return default
        else:
            return parse(self.stdout)


def no_git_env(env: Mapping[str, str]) -> dict[str, str]:
    # adapted from pre-commit
    # Too many bugs dealing with environment variables and GIT:
    # https://github.com/pre-commit/pre-commit/issues/300
    # In git 2.6.3 (maybe others), git exports GIT_WORK_TREE while running
    # pre-commit hooks
    # In git 1.9.1 (maybe others), git exports GIT_DIR and GIT_INDEX_FILE
    # while running pre-commit hooks in submodules.
    # GIT_DIR: Causes git clone to clone wrong thing
    # GIT_INDEX_FILE: Causes 'error invalid object ...' during commit
    for k, v in env.items():
        if k.startswith("GIT_"):
            log.debug("%s: %s", k, v)
    return {
        k: v
        for k, v in env.items()
        if not k.startswith("GIT_")
        or k in ("GIT_EXEC_PATH", "GIT_SSH", "GIT_SSH_COMMAND")
    }


def avoid_pip_isolation(env: Mapping[str, str]) -> dict[str, str]:
    """
    pip build isolation can break Mercurial
    (see https://github.com/pypa/pip/issues/10635)

    pip uses PYTHONNOUSERSITE and a path in PYTHONPATH containing "pip-build-env-".
    """
    new_env = {k: v for k, v in env.items() if k != "PYTHONNOUSERSITE"}
    if "PYTHONPATH" not in new_env:
        return new_env

    new_env["PYTHONPATH"] = os.pathsep.join(
        [
            path
            for path in new_env["PYTHONPATH"].split(os.pathsep)
            if "pip-build-env-" not in path
        ]
    )
    return new_env


def ensure_stripped_str(str_or_bytes: str | bytes) -> str:
    if isinstance(str_or_bytes, str):
        return str_or_bytes.strip()
    else:
        return str_or_bytes.decode("utf-8", "surrogateescape").strip()


def run(
    cmd: _t.CMD_TYPE,
    cwd: _t.PathT,
    *,
    strip: bool = True,
    trace: bool = True,
    timeout: int | None = None,
    check: bool = False,
) -> CompletedProcess:
    if isinstance(cmd, str):
        cmd = shlex.split(cmd)
    else:
        cmd = [os.fspath(x) for x in cmd]
    cmd_4_trace = " ".join(map(_unsafe_quote_for_display, cmd))
    log.debug("at %s\n    $ %s ", cwd, cmd_4_trace)
    if timeout is None:
        timeout = BROKEN_TIMEOUT
    res = subprocess.run(
        cmd,
        capture_output=True,
        cwd=os.fspath(cwd),
        env=dict(
            avoid_pip_isolation(no_git_env(os.environ)),
            # os.environ,
            # try to disable i18n, but still allow UTF-8 encoded text.
            LC_ALL="C.UTF-8",
            LANGUAGE="",
            HGPLAIN="1",
        ),
        text=True,
        encoding="utf-8",
        timeout=timeout,
    )

    res = CompletedProcess.from_raw(res, strip=strip)
    if trace:
        if res.stdout:
            log.debug("out:\n%s", textwrap.indent(res.stdout, "    "))
        if res.stderr:
            log.debug("err:\n%s", textwrap.indent(res.stderr, "    "))
        if res.returncode:
            log.debug("ret: %s", res.returncode)
    if check:
        res.check_returncode()
    return res


def _unsafe_quote_for_display(item: _t.PathT) -> str:
    # give better results than shlex.join in our cases
    text = os.fspath(item)
    return text if all(c not in text for c in " {[:") else f'"{text}"'


def has_command(
    name: str, args: Sequence[str] = ["version"], warn: bool = True
) -> bool:
    try:
        p = run([name, *args], cwd=".")
        if p.returncode != 0:
            log.error(f"Command '{name}' returned non-zero. This is stderr:")
            log.error(p.stderr)
    except OSError as e:
        log.warning("command %s missing: %s", name, e)
        res = False
    except subprocess.TimeoutExpired as e:
        log.warning("command %s timed out %s", name, e)
        res = False

    else:
        res = not p.returncode
    if not res and warn:
        warnings.warn("%r was not found" % name, category=RuntimeWarning)
    return res


class CommandNotFoundError(LookupError, FileNotFoundError):
    pass


def require_command(name: str) -> None:
    if not has_command(name, warn=False):
        raise CommandNotFoundError(name)

        </content>
    </file>
    <file>
        <name>_types.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_types.py</path>
        <content>
from __future__ import annotations

import os

from typing import TYPE_CHECKING
from typing import Callable
from typing import List
from typing import Sequence
from typing import Tuple
from typing import Union

if TYPE_CHECKING:
    import sys

    if sys.version_info >= (3, 10):
        from typing import TypeAlias
    else:
        from typing_extensions import TypeAlias

    from . import version

PathT: TypeAlias = Union["os.PathLike[str]", str]

CMD_TYPE: TypeAlias = Union[Sequence[PathT], str]

VERSION_SCHEME: TypeAlias = Union[str, Callable[["version.ScmVersion"], str]]
VERSION_SCHEMES: TypeAlias = Union[List[str], Tuple[str, ...], VERSION_SCHEME]
SCMVERSION: TypeAlias = "version.ScmVersion"

        </content>
    </file>
    <file>
        <name>_version_cls.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_version_cls.py</path>
        <content>
from __future__ import annotations

from typing import Type
from typing import Union
from typing import cast

try:
    from packaging.version import InvalidVersion
    from packaging.version import Version as Version
except ImportError:
    from setuptools.extern.packaging.version import InvalidVersion  # type: ignore
    from setuptools.extern.packaging.version import Version as Version  # type: ignore
from . import _log

log = _log.log.getChild("version_cls")


class NonNormalizedVersion(Version):
    """A non-normalizing version handler.

    You can use this class to preserve version verification but skip normalization.
    For example you can use this to avoid git release candidate version tags
    ("1.0.0-rc1") to be normalized to "1.0.0rc1". Only use this if you fully
    trust the version tags.
    """

    def __init__(self, version: str) -> None:
        # parse and validate using parent
        super().__init__(version)

        # store raw for str
        self._raw_version = version

    def __str__(self) -> str:
        # return the non-normalized version (parent returns the normalized)
        return self._raw_version

    def __repr__(self) -> str:
        # same pattern as parent
        return f"<NonNormalizedVersion({self._raw_version!r})>"


def _version_as_tuple(version_str: str) -> tuple[int | str, ...]:
    try:
        parsed_version = Version(version_str)
    except InvalidVersion as e:
        log.error("failed to parse version %s: %s", e, version_str)
        return (version_str,)
    else:
        version_fields: tuple[int | str, ...] = parsed_version.release
        if parsed_version.dev is not None:
            version_fields += (f"dev{parsed_version.dev}",)
        if parsed_version.local is not None:
            version_fields += (parsed_version.local,)
        return version_fields


_VersionT = Union[Version, NonNormalizedVersion]


def import_name(name: str) -> object:
    import importlib

    pkg_name, cls_name = name.rsplit(".", 1)
    pkg = importlib.import_module(pkg_name)
    return getattr(pkg, cls_name)


def _validate_version_cls(
    version_cls: type[_VersionT] | str | None, normalize: bool
) -> type[_VersionT]:
    if not normalize:
        if version_cls is not None:
            raise ValueError(
                "Providing a custom `version_cls` is not permitted when "
                "`normalize=False`"
            )
        return NonNormalizedVersion
    else:
        # Use `version_cls` if provided, default to packaging or pkg_resources
        if version_cls is None:
            return Version
        elif isinstance(version_cls, str):
            try:
                return cast(Type[_VersionT], import_name(version_cls))
            except:  # noqa
                raise ValueError(
                    f"Unable to import version_cls='{version_cls}'"
                ) from None
        else:
            return version_cls

        </content>
    </file>
    <file>
        <name>__init__.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\__init__.py</path>
        <content>
"""
:copyright: 2010-2023 by Ronny Pfannschmidt
:license: MIT
"""

from __future__ import annotations

from ._config import DEFAULT_LOCAL_SCHEME
from ._config import DEFAULT_VERSION_SCHEME
from ._config import Configuration
from ._get_version_impl import _get_version
from ._get_version_impl import get_version
from ._integration.dump_version import dump_version  # soft deprecated
from ._version_cls import NonNormalizedVersion
from ._version_cls import Version
from .version import ScmVersion

# Public API
__all__ = [
    "DEFAULT_LOCAL_SCHEME",
    "DEFAULT_VERSION_SCHEME",
    "Configuration",
    "NonNormalizedVersion",
    "ScmVersion",
    "Version",
    "_get_version",
    "dump_version",
    # soft deprecated imports, left for backward compatibility
    "get_version",
]

        </content>
    </file>
    <file>
        <name>__main__.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\__main__.py</path>
        <content>
from __future__ import annotations

from ._cli import main

if __name__ == "__main__":
    raise SystemExit(main())

        </content>
    </file>
            </directory>
                <directory name="_file_finders">
    <file>
        <name>git.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_file_finders\git.py</path>
        <content>
from __future__ import annotations

import logging
import os
import subprocess
import tarfile

from typing import IO

from .. import _types as _t
from .._run_cmd import run as _run
from ..integration import data_from_mime
from . import is_toplevel_acceptable
from . import scm_find_files
from .pathtools import norm_real

log = logging.getLogger(__name__)


def _git_toplevel(path: str) -> str | None:
    try:
        cwd = os.path.abspath(path or ".")
        res = _run(["git", "rev-parse", "HEAD"], cwd=cwd)
        if res.returncode:
            # BAIL if there is no commit
            log.error("listing git files failed - pretending there aren't any")
            return None
        res = _run(
            ["git", "rev-parse", "--show-prefix"],
            cwd=cwd,
        )
        if res.returncode:
            return None
        out = res.stdout[:-1]  # remove the trailing pathsep
        if not out:
            out = cwd
        else:
            # Here, ``out`` is a relative path to root of git.
            # ``cwd`` is absolute path to current working directory.
            # the below method removes the length of ``out`` from
            # ``cwd``, which gives the git toplevel
            assert cwd.replace("\\", "/").endswith(out), f"cwd={cwd!r}\nout={out!r}"
            # In windows cwd contains ``\`` which should be replaced by ``/``
            # for this assertion to work. Length of string isn't changed by replace
            # ``\\`` is just and escape for `\`
            out = cwd[: -len(out)]
        log.debug("find files toplevel %s", out)
        return norm_real(out)
    except subprocess.CalledProcessError:
        # git returned error, we are not in a git repo
        return None
    except OSError:
        # git command not found, probably
        return None


def _git_interpret_archive(fd: IO[bytes], toplevel: str) -> tuple[set[str], set[str]]:
    with tarfile.open(fileobj=fd, mode="r|*") as tf:
        git_files = set()
        git_dirs = {toplevel}
        for member in tf.getmembers():
            name = os.path.normcase(member.name).replace("/", os.path.sep)
            if member.type == tarfile.DIRTYPE:
                git_dirs.add(name)
            else:
                git_files.add(name)
        return git_files, git_dirs


def _git_ls_files_and_dirs(toplevel: str) -> tuple[set[str], set[str]]:
    # use git archive instead of git ls-file to honor
    # export-ignore git attribute

    cmd = ["git", "archive", "--prefix", toplevel + os.path.sep, "HEAD"]
    proc = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, cwd=toplevel, stderr=subprocess.DEVNULL
    )
    assert proc.stdout is not None
    try:
        try:
            return _git_interpret_archive(proc.stdout, toplevel)
        finally:
            # ensure we avoid resource warnings by cleaning up the process
            proc.stdout.close()
            proc.terminate()
    except Exception:
        if proc.wait() != 0:
            log.error("listing git files failed - pretending there aren't any")
        return set(), set()


def git_find_files(path: _t.PathT = "") -> list[str]:
    toplevel = _git_toplevel(os.fspath(path))
    if not is_toplevel_acceptable(toplevel):
        return []
    fullpath = norm_real(path)
    if not fullpath.startswith(toplevel):
        log.warning("toplevel mismatch computed %s vs resolved %s ", toplevel, fullpath)
    git_files, git_dirs = _git_ls_files_and_dirs(toplevel)
    return scm_find_files(path, git_files, git_dirs)


def git_archive_find_files(path: _t.PathT = "") -> list[str]:
    # This function assumes that ``path`` is obtained from a git archive
    # and therefore all the files that should be ignored were already removed.
    archival = os.path.join(path, ".git_archival.txt")
    if not os.path.exists(archival):
        return []

    data = data_from_mime(archival)

    if "$Format" in data.get("node", ""):
        # Substitutions have not been performed, so not a reliable archive
        return []

    log.warning("git archive detected - fallback to listing all files")
    return scm_find_files(path, set(), set(), force_all_files=True)

        </content>
    </file>
    <file>
        <name>hg.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_file_finders\hg.py</path>
        <content>
from __future__ import annotations

import logging
import os
import subprocess

from .. import _types as _t
from .._file_finders import is_toplevel_acceptable
from .._file_finders import scm_find_files
from .._run_cmd import run as _run
from ..integration import data_from_mime
from .pathtools import norm_real

log = logging.getLogger(__name__)


def _hg_toplevel(path: str) -> str | None:
    try:
        return _run(
            ["hg", "root"],
            cwd=(path or "."),
            check=True,
        ).parse_success(norm_real)
    except subprocess.CalledProcessError:
        # hg returned error, we are not in a mercurial repo
        return None
    except OSError:
        # hg command not found, probably
        return None


def _hg_ls_files_and_dirs(toplevel: str) -> tuple[set[str], set[str]]:
    hg_files: set[str] = set()
    hg_dirs = {toplevel}
    res = _run(["hg", "files"], cwd=toplevel)
    if res.returncode:
        return set(), set()
    for name in res.stdout.splitlines():
        name = os.path.normcase(name).replace("/", os.path.sep)
        fullname = os.path.join(toplevel, name)
        hg_files.add(fullname)
        dirname = os.path.dirname(fullname)
        while len(dirname) > len(toplevel) and dirname not in hg_dirs:
            hg_dirs.add(dirname)
            dirname = os.path.dirname(dirname)
    return hg_files, hg_dirs


def hg_find_files(path: str = "") -> list[str]:
    toplevel = _hg_toplevel(path)
    if not is_toplevel_acceptable(toplevel):
        return []
    assert toplevel is not None
    hg_files, hg_dirs = _hg_ls_files_and_dirs(toplevel)
    return scm_find_files(path, hg_files, hg_dirs)


def hg_archive_find_files(path: _t.PathT = "") -> list[str]:
    # This function assumes that ``path`` is obtained from a mercurial archive
    # and therefore all the files that should be ignored were already removed.
    archival = os.path.join(path, ".hg_archival.txt")
    if not os.path.exists(archival):
        return []

    data = data_from_mime(archival)

    if "node" not in data:
        # Ensure file is valid
        return []

    log.warning("hg archive detected - fallback to listing all files")
    return scm_find_files(path, set(), set(), force_all_files=True)

        </content>
    </file>
    <file>
        <name>pathtools.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_file_finders\pathtools.py</path>
        <content>
from __future__ import annotations

import os

from setuptools_scm import _types as _t


def norm_real(path: _t.PathT) -> str:
    return os.path.normcase(os.path.realpath(path))

        </content>
    </file>
    <file>
        <name>__init__.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_file_finders\__init__.py</path>
        <content>
from __future__ import annotations

import itertools
import os

from typing import TYPE_CHECKING
from typing import Callable

from .. import _log
from .. import _types as _t
from .._entrypoints import iter_entry_points
from .pathtools import norm_real

if TYPE_CHECKING:
    import sys

    if sys.version_info >= (3, 10):
        from typing import TypeGuard
    else:
        from typing_extensions import TypeGuard


log = _log.log.getChild("file_finder")


def scm_find_files(
    path: _t.PathT,
    scm_files: set[str],
    scm_dirs: set[str],
    force_all_files: bool = False,
) -> list[str]:
    """ setuptools compatible file finder that follows symlinks

    - path: the root directory from which to search
    - scm_files: set of scm controlled files and symlinks
      (including symlinks to directories)
    - scm_dirs: set of scm controlled directories
      (including directories containing no scm controlled files)
    - force_all_files: ignore ``scm_files`` and ``scm_dirs`` and list everything.

    scm_files and scm_dirs must be absolute with symlinks resolved (realpath),
    with normalized case (normcase)

    Spec here: https://setuptools.pypa.io/en/latest/userguide/extension.html#\
        adding-support-for-revision-control-systems
    """
    realpath = norm_real(path)
    seen: set[str] = set()
    res: list[str] = []
    for dirpath, dirnames, filenames in os.walk(realpath, followlinks=True):
        # dirpath with symlinks resolved
        realdirpath = norm_real(dirpath)

        def _link_not_in_scm(n: str, realdirpath: str = realdirpath) -> bool:
            fn = os.path.join(realdirpath, os.path.normcase(n))
            return os.path.islink(fn) and fn not in scm_files

        if not force_all_files and realdirpath not in scm_dirs:
            # directory not in scm, don't walk it's content
            dirnames[:] = []
            continue
        if os.path.islink(dirpath) and not os.path.relpath(
            realdirpath, realpath
        ).startswith(os.pardir):
            # a symlink to a directory not outside path:
            # we keep it in the result and don't walk its content
            res.append(os.path.join(path, os.path.relpath(dirpath, path)))
            dirnames[:] = []
            continue
        if realdirpath in seen:
            # symlink loop protection
            dirnames[:] = []
            continue
        dirnames[:] = [
            dn for dn in dirnames if force_all_files or not _link_not_in_scm(dn)
        ]
        for filename in filenames:
            if not force_all_files and _link_not_in_scm(filename):
                continue
            # dirpath + filename with symlinks preserved
            fullfilename = os.path.join(dirpath, filename)
            is_tracked = norm_real(fullfilename) in scm_files
            if force_all_files or is_tracked:
                res.append(os.path.join(path, os.path.relpath(fullfilename, realpath)))
        seen.add(realdirpath)
    return res


def is_toplevel_acceptable(toplevel: str | None) -> TypeGuard[str]:
    """ """
    if toplevel is None:
        return False

    ignored: list[str] = os.environ.get("SETUPTOOLS_SCM_IGNORE_VCS_ROOTS", "").split(
        os.pathsep
    )
    ignored = [os.path.normcase(p) for p in ignored]

    log.debug("toplevel: %r\n    ignored %s", toplevel, ignored)

    return toplevel not in ignored


def find_files(path: _t.PathT = "") -> list[str]:
    for ep in itertools.chain(
        iter_entry_points("setuptools_scm.files_command"),
        iter_entry_points("setuptools_scm.files_command_fallback"),
    ):
        command: Callable[[_t.PathT], list[str]] = ep.load()
        res: list[str] = command(path)
        if res:
            return res
    return []

        </content>
    </file>
                </directory>
                <directory name="_integration">
    <file>
        <name>dump_version.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_integration\dump_version.py</path>
        <content>
from __future__ import annotations

import warnings

from pathlib import Path

from .. import _types as _t
from .._log import log as parent_log
from .._version_cls import _version_as_tuple
from ..version import ScmVersion

log = parent_log.getChild("dump_version")

TEMPLATES = {
    ".py": """\
# file generated by setuptools_scm
# don't change, don't track in version control
TYPE_CHECKING = False
if TYPE_CHECKING:
    from typing import Tuple, Union
    VERSION_TUPLE = Tuple[Union[int, str], ...]
else:
    VERSION_TUPLE = object

version: str
__version__: str
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE

__version__ = version = {version!r}
__version_tuple__ = version_tuple = {version_tuple!r}
""",
    ".txt": "{version}",
}


def dump_version(
    root: _t.PathT,
    version: str,
    write_to: _t.PathT,
    template: str | None = None,
    scm_version: ScmVersion | None = None,
) -> None:
    assert isinstance(version, str)
    root = Path(root)
    write_to = Path(write_to)
    if write_to.is_absolute():
        # trigger warning on escape
        write_to.relative_to(root)
        warnings.warn(
            f"{write_to=!s} is a absolute path,"
            " please switch to using a relative version file",
            DeprecationWarning,
        )
        target = write_to
    else:
        target = Path(root).joinpath(write_to)
    write_version_to_path(
        target, template=template, version=version, scm_version=scm_version
    )


def _validate_template(target: Path, template: str | None) -> str:
    if template == "":
        warnings.warn(f"{template=} looks like a error, using default instead")
        template = None
    if template is None:
        template = TEMPLATES.get(target.suffix)

    if template is None:
        raise ValueError(
            f"bad file format: {target.suffix!r} (of {target})\n"
            "only *.txt and *.py have a default template"
        )
    else:
        return template


def write_version_to_path(
    target: Path, template: str | None, version: str, scm_version: ScmVersion | None
) -> None:
    final_template = _validate_template(target, template)
    log.debug("dump %s into %s", version, target)
    version_tuple = _version_as_tuple(version)
    if scm_version is not None:
        content = final_template.format(
            version=version,
            version_tuple=version_tuple,
            scm_version=scm_version,
        )
    else:
        content = final_template.format(version=version, version_tuple=version_tuple)

    target.write_text(content, encoding="utf-8")

        </content>
    </file>
    <file>
        <name>pyproject_reading.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_integration\pyproject_reading.py</path>
        <content>
from __future__ import annotations

import warnings

from pathlib import Path
from typing import NamedTuple

from .. import _log
from .setuptools import read_dist_name_from_setup_cfg
from .toml import TOML_RESULT
from .toml import read_toml_content

log = _log.log.getChild("pyproject_reading")

_ROOT = "root"


class PyProjectData(NamedTuple):
    path: Path
    tool_name: str
    project: TOML_RESULT
    section: TOML_RESULT

    @property
    def project_name(self) -> str | None:
        return self.project.get("name")


def read_pyproject(
    path: Path = Path("pyproject.toml"),
    tool_name: str = "setuptools_scm",
    require_section: bool = True,
) -> PyProjectData:
    defn = read_toml_content(path, None if require_section else {})
    try:
        section = defn.get("tool", {})[tool_name]
    except LookupError as e:
        error = f"{path} does not contain a tool.{tool_name} section"
        if require_section:
            raise LookupError(error) from e
        else:
            log.warning("toml section missing %r", error, exc_info=True)
            section = {}

    project = defn.get("project", {})
    return PyProjectData(path, tool_name, project, section)


def get_args_for_pyproject(
    pyproject: PyProjectData,
    dist_name: str | None,
    kwargs: TOML_RESULT,
) -> TOML_RESULT:
    """drops problematic details and figures the distribution name"""
    section = pyproject.section.copy()
    kwargs = kwargs.copy()
    if "relative_to" in section:
        relative = section.pop("relative_to")
        warnings.warn(
            f"{pyproject.path}: at [tool.{pyproject.tool_name}]\n"
            f"ignoring value relative_to={relative!r}"
            " as its always relative to the config file"
        )
    if "dist_name" in section:
        if dist_name is None:
            dist_name = section.pop("dist_name")
        else:
            assert dist_name == section["dist_name"]
            section.pop("dist_name")
    if dist_name is None:
        # minimal pep 621 support for figuring the pretend keys
        dist_name = pyproject.project_name
    if dist_name is None:
        dist_name = read_dist_name_from_setup_cfg()
    if _ROOT in kwargs:
        if kwargs[_ROOT] is None:
            kwargs.pop(_ROOT, None)
        elif _ROOT in section:
            if section[_ROOT] != kwargs[_ROOT]:
                warnings.warn(
                    f"root {section[_ROOT]} is overridden"
                    f" by the cli arg {kwargs[_ROOT]}"
                )
            section.pop(_ROOT, None)
    return {"dist_name": dist_name, **section, **kwargs}

        </content>
    </file>
    <file>
        <name>setuptools.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_integration\setuptools.py</path>
        <content>
from __future__ import annotations

import logging
import os
import warnings

from typing import Any
from typing import Callable

import setuptools

from .. import _config

log = logging.getLogger(__name__)


def read_dist_name_from_setup_cfg(
    input: str | os.PathLike[str] = "setup.cfg",
) -> str | None:
    # minimal effort to read dist_name off setup.cfg metadata
    import configparser

    parser = configparser.ConfigParser()
    parser.read([input], encoding="utf-8")
    dist_name = parser.get("metadata", "name", fallback=None)
    return dist_name


def _warn_on_old_setuptools(_version: str = setuptools.__version__) -> None:
    if int(_version.split(".")[0]) < 61:
        warnings.warn(
            RuntimeWarning(
                f"""
ERROR: setuptools=={_version} is used in combination with setuptools_scm>=8.x

Your build configuration is incomplete and previously worked by accident!
setuptools_scm requires setuptools>=61

Suggested workaround if applicable:
 - migrating from the deprecated setup_requires mechanism to pep517/518
   and using a pyproject.toml to declare build dependencies
   which are reliably pre-installed before running the build tools
"""
            )
        )


def _assign_version(
    dist: setuptools.Distribution, config: _config.Configuration
) -> None:
    from .._get_version_impl import _get_version
    from .._get_version_impl import _version_missing

    # todo: build time plugin
    maybe_version = _get_version(config, force_write_version_files=True)

    if maybe_version is None:
        _version_missing(config)
    else:
        assert dist.metadata.version is None
        dist.metadata.version = maybe_version


_warn_on_old_setuptools()


def _log_hookstart(hook: str, dist: setuptools.Distribution) -> None:
    log.debug("%s %r", hook, vars(dist.metadata))


def version_keyword(
    dist: setuptools.Distribution,
    keyword: str,
    value: bool | dict[str, Any] | Callable[[], dict[str, Any]],
) -> None:
    overrides: dict[str, Any]
    if value is True:
        overrides = {}
    elif callable(value):
        overrides = value()
    else:
        assert isinstance(value, dict), "version_keyword expects a dict or True"
        overrides = value

    assert (
        "dist_name" not in overrides
    ), "dist_name may not be specified in the setup keyword "
    dist_name: str | None = dist.metadata.name
    _log_hookstart("version_keyword", dist)

    if dist.metadata.version is not None:
        warnings.warn(f"version of {dist_name} already set")
        return

    if dist_name is None:
        dist_name = read_dist_name_from_setup_cfg()

    config = _config.Configuration.from_file(
        dist_name=dist_name,
        _require_section=False,
        **overrides,
    )
    _assign_version(dist, config)


def infer_version(dist: setuptools.Distribution) -> None:
    _log_hookstart("infer_version", dist)
    log.debug("dist %s %s", id(dist), id(dist.metadata))
    if dist.metadata.version is not None:
        return  # metadata already added by hook
    dist_name = dist.metadata.name
    if dist_name is None:
        dist_name = read_dist_name_from_setup_cfg()
    if not os.path.isfile("pyproject.toml"):
        return
    if dist_name == "setuptools_scm":
        return
    try:
        config = _config.Configuration.from_file(dist_name=dist_name)
    except LookupError as e:
        log.info(e, exc_info=True)
    else:
        _assign_version(dist, config)

        </content>
    </file>
    <file>
        <name>toml.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_integration\toml.py</path>
        <content>
from __future__ import annotations

import sys

from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from typing import Callable
from typing import Dict
from typing import TypedDict
from typing import cast

if sys.version_info >= (3, 11):
    from tomllib import loads as load_toml
else:
    from tomli import loads as load_toml

if TYPE_CHECKING:
    if sys.version_info >= (3, 10):
        from typing import TypeAlias
    else:
        from typing_extensions import TypeAlias

from .. import _log

log = _log.log.getChild("toml")

TOML_RESULT: TypeAlias = Dict[str, Any]
TOML_LOADER: TypeAlias = Callable[[str], TOML_RESULT]


def read_toml_content(path: Path, default: TOML_RESULT | None = None) -> TOML_RESULT:
    try:
        data = path.read_text(encoding="utf-8")
    except FileNotFoundError:
        if default is None:
            raise
        else:
            log.debug("%s missing, presuming default %r", path, default)
            return default
    else:
        return load_toml(data)


class _CheatTomlData(TypedDict):
    cheat: dict[str, Any]


def load_toml_or_inline_map(data: str | None) -> dict[str, Any]:
    """
    load toml data - with a special hack if only a inline map is given
    """
    if not data:
        return {}
    elif data[0] == "{":
        data = "cheat=" + data
        loaded: _CheatTomlData = cast(_CheatTomlData, load_toml(data))
        return loaded["cheat"]
    return load_toml(data)

        </content>
    </file>
    <file>
        <name>__init__.py</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\setuptools_scm\_integration\__init__.py</path>
        <content>

        </content>
    </file>
                </directory>
    <directory name=".pytest_cache">
    <file>
        <name>.gitignore</name>
        <path>.pytest_cache\.gitignore</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>CACHEDIR.TAG</name>
        <path>.pytest_cache\CACHEDIR.TAG</path>
        <content>Full content not provided</content>
    </file>
    </directory>
        <directory name="v">
        </directory>
            <directory name="cache">
    <file>
        <name>lastfailed</name>
        <path>.pytest_cache\v\cache\lastfailed</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>nodeids</name>
        <path>.pytest_cache\v\cache\nodeids</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>stepwise</name>
        <path>.pytest_cache\v\cache\stepwise</path>
        <content>Full content not provided</content>
    </file>
            </directory>
    <directory name="academic_claim_analyzer">
    <file>
        <name>batch_processor.py</name>
        <path>academic_claim_analyzer\batch_processor.py</path>
        <content>
# academic_claim_analyzer/batch_processor.py

import asyncio
import json
import os
from datetime import datetime
from typing import List, Dict, Any
from .main import analyze_claim
from .models import ClaimAnalysis, RankedPaper

async def process_claims(claims: List[str], **kwargs) -> Dict[str, ClaimAnalysis]:
    results = {}
    for claim in claims:
        try:
            analysis = await analyze_claim(claim, **kwargs)
            results[claim] = analysis
        except Exception as e:
            print(f"Error analyzing claim: {claim}")
            print(f"Error details: {str(e)}")
    return results

def batch_analyze_claims(claims: List[str], output_dir: str, num_top_papers: int = 5, **kwargs) -> Dict[str, List[Dict[str, Any]]]:
    results = asyncio.run(process_claims(claims, **kwargs))
    
    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Generate a timestamp for the filename
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"claim_analysis_results_{timestamp}.json"
    filepath = os.path.join(output_dir, filename)
    
    # Process and serialize the results
    processed_results = {}
    for claim, analysis in results.items():
        top_papers = analysis.get_top_papers(num_top_papers)
        processed_results[claim] = [
            {
                "title": paper.title,
                "authors": paper.authors,
                "year": paper.year,
                "doi": paper.doi,
                "relevance_score": paper.relevance_score,
                "relevant_quotes": paper.relevant_quotes,
                "analysis": paper.analysis,
                "bibtex": paper.bibtex
            }
            for paper in top_papers
        ]
    
    # Write the results to a JSON file
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(processed_results, f, ensure_ascii=False, indent=2)
    
    print(f"Results stored in: {filepath}")
    
    return processed_results

def print_results_summary(results: Dict[str, List[Dict[str, Any]]]):
    print("\nResults Summary:")
    for claim, papers in results.items():
        print(f"\nClaim: {claim}")
        print(f"Number of top papers: {len(papers)}")

def print_detailed_result(claim: str, papers: List[Dict[str, Any]]):
    print("\nDetailed Result for Claim:")
    print(f"Claim: {claim}")
    print(f"\nTop ranked papers:")
    for paper in papers:
        print(f"\nTitle: {paper['title']}")
        print(f"Authors: {', '.join(paper['authors'])}")
        print(f"Year: {paper['year']}")
        print(f"DOI: {paper['doi']}")
        print(f"Relevance Score: {paper['relevance_score']}")
        print(f"Analysis: {paper['analysis']}")
        print("Relevant Quotes:")
        for quote in paper['relevant_quotes']:
            print(f"- {quote}")
        print("BibTeX:")
        print(paper['bibtex'])

def print_schema(results: Dict[str, List[Dict[str, Any]]]):
    sample_paper = next(iter(results.values()))[0]
    schema = {
        "claim": "string",
        "papers": [
            {
                "title": "string",
                "authors": ["string"],
                "year": "int",
                "doi": "string",
                "relevance_score": "float",
                "relevant_quotes": ["string"],
                "analysis": "string",
                "bibtex": "string"
            }
        ]
    }
    print("\nSchema of result object:")
    print(json.dumps(schema, indent=2))

def main():
    claims = [
        "Coffee consumption is associated with reduced risk of type 2 diabetes.",
        "Regular exercise can lower the risk of cardiovascular disease.",
        "Mindfulness meditation may help reduce symptoms of anxiety and depression.",
    ]

    results = batch_analyze_claims(claims, output_dir=r"C:\Users\bnsoh2\Desktop\test", num_queries=2, papers_per_query=3, num_top_papers=2)

    print_results_summary(results)
    first_claim = next(iter(results))
    print_detailed_result(first_claim, results[first_claim])
    print_schema(results)

if __name__ == "__main__":
    main()
        </content>
    </file>
    <file>
        <name>main.py</name>
        <path>academic_claim_analyzer\main.py</path>
        <content>
# academic_claim_analyzer/main.py

import asyncio
import logging
from typing import List
from .query_formulator import formulate_queries
from .paper_ranker import rank_papers
from .search import OpenAlexSearch, ScopusSearch, CORESearch, BaseSearch
from .models import ClaimAnalysis, Paper

logger = logging.getLogger(__name__)

async def analyze_claim(
    claim: str,
    num_queries: int = 2,
    papers_per_query: int = 2,
    num_papers_to_return: int = 2
) -> ClaimAnalysis:
    analysis = ClaimAnalysis(
        claim=claim,
        parameters={
            "num_queries": num_queries,
            "papers_per_query": papers_per_query,
            "num_papers_to_return": num_papers_to_return
        }
    )
    
    try:
        await _perform_analysis(analysis)
    except Exception as e:
        logger.error(f"Error during claim analysis: {str(e)}")
        analysis.metadata["error"] = str(e)
    
    return analysis

async def _perform_analysis(analysis: ClaimAnalysis) -> None:
    await _formulate_queries(analysis)
    await _perform_searches(analysis)
    await _rank_papers(analysis)

async def _formulate_queries(analysis: ClaimAnalysis) -> None:
    openalex_queries = await formulate_queries(analysis.claim, analysis.parameters["num_queries"], "openalex")
    scopus_queries = await formulate_queries(analysis.claim, analysis.parameters["num_queries"], "scopus")
    
    for query in openalex_queries:
        analysis.add_query(query, "openalex")
    for query in scopus_queries:
        analysis.add_query(query, "scopus")

async def _perform_searches(analysis: ClaimAnalysis) -> None:
    search_tasks = []
    
    openalex_search = OpenAlexSearch("bnsoh2@huskers.unl.edu")
    openalex_queries = [q for q in analysis.queries if q.source == "openalex"]
    for query in openalex_queries:
        search_tasks.append(_search_and_add_results(
            openalex_search, query.query, analysis.parameters["papers_per_query"], analysis
        ))
    
    scopus_search = ScopusSearch()
    scopus_queries = [q for q in analysis.queries if q.source == "scopus"]
    for query in scopus_queries:
        search_tasks.append(_search_and_add_results(
            scopus_search, query.query, analysis.parameters["papers_per_query"], analysis
        ))
    
    core_search = CORESearch()
    search_tasks.append(_search_and_add_results(
        core_search, analysis.claim, analysis.parameters["papers_per_query"], analysis
    ))

    await asyncio.gather(*search_tasks)

async def _search_and_add_results(search_module: BaseSearch, query: str, limit: int, analysis: ClaimAnalysis) -> None:
    try:
        results = await search_module.search(query, limit)
        for paper in results:
            analysis.add_search_result(paper)
    except Exception as e:
        logger.error(f"Error during search with {search_module.__class__.__name__}: {str(e)}")

async def _rank_papers(analysis: ClaimAnalysis) -> None:
    ranked_papers = await rank_papers(analysis.search_results, analysis.claim)
    for paper in ranked_papers:
        analysis.add_ranked_paper(paper)

async def main():
    claim = "Coffee consumption is associated with reduced risk of type 2 diabetes."
    analysis_result = await analyze_claim(claim)
    
    print(f"Claim: {analysis_result.claim}")
    print(f"Number of queries generated: {len(analysis_result.queries)}")
    print(f"Total papers found: {len(analysis_result.search_results)}")
    
    # Add this section to print a summary of all search results
    print("\nSearch Results Summary:")
    for i, paper in enumerate(analysis_result.search_results, 1):
        print(f"\n{i}. Title: {paper.title}")
        print(f"   Authors: {', '.join(paper.authors)}")
        print(f"   DOI: {paper.doi}")
        print(f"   Source: {paper.source}")
    
    print(f"\nNumber of ranked papers: {len(analysis_result.ranked_papers)}")
    print("\nTop ranked papers:")
    
    for paper in analysis_result.get_top_papers(analysis_result.parameters["num_papers_to_return"]):
        print(f"\nTitle: {paper.title}")
        print(f"Authors: {', '.join(paper.authors)}")
        print(f"DOI: {paper.doi}")
        print(f"Relevance Score: {paper.relevance_score}")
        print(f"Analysis: {paper.analysis}")
        print("Relevant Quotes:")
        for quote in paper.relevant_quotes:
            print(f"- {quote}")

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    asyncio.run(main())
        </content>
    </file>
    <file>
        <name>models.py</name>
        <path>academic_claim_analyzer\models.py</path>
        <content>
# academic_claim_analyzer/models.py

from dataclasses import dataclass, field, asdict as dataclasses_asdict
from typing import List, Dict, Any, Optional
from datetime import datetime


@dataclass
class SearchQuery:
    query: str
    source: str
    timestamp: datetime = field(default_factory=datetime.utcnow)

@dataclass
class Paper:
    title: str
    authors: List[str]
    year: int
    doi: str
    abstract: Optional[str] = None
    source: str = ""
    full_text: Optional[str] = None
    pdf_link: Optional[str] = None
    bibtex: Optional[str] = None
    metadata: Dict[str, Any] = field(default_factory=dict)

@dataclass
class RankedPaper(Paper):
    relevance_score: float = None
    relevant_quotes: List[str] = field(default_factory=list)
    analysis: str = ""
    bibtex: str = ""
        
@dataclass
class ClaimAnalysis:
    claim: str
    timestamp: datetime = field(default_factory=datetime.utcnow)
    parameters: Dict[str, Any] = field(default_factory=dict)
    queries: List[SearchQuery] = field(default_factory=list)
    search_results: List[Paper] = field(default_factory=list)
    ranked_papers: List[RankedPaper] = field(default_factory=list)
    metadata: Dict[str, Any] = field(default_factory=dict)

    def add_query(self, query: str, source: str):
        self.queries.append(SearchQuery(query, source))

    def add_search_result(self, paper: Paper):
        self.search_results.append(paper)

    def add_ranked_paper(self, paper: RankedPaper):
        self.ranked_papers.append(paper)

    def get_top_papers(self, n: int) -> List[RankedPaper]:
        return sorted(self.ranked_papers, key=lambda x: x.relevance_score, reverse=True)[:n]
    
    def to_dict(self) -> Dict[str, Any]:
        def _serialize(obj):
            if isinstance(obj, datetime):
                return obj.isoformat()
            elif hasattr(obj, 'to_dict'):
                return obj.to_dict()
            elif isinstance(obj, list):
                return [_serialize(item) for item in obj]
            elif isinstance(obj, dict):
                return {key: _serialize(value) for key, value in obj.items()}
            else:
                return obj

        return {key: _serialize(value) for key, value in dataclasses_asdict(self).items()}
        </content>
    </file>
    <file>
        <name>paper_ranker.py</name>
        <path>academic_claim_analyzer\paper_ranker.py</path>
        <content>
# academic_claim_analyzer/paper_ranker.py

import asyncio
import json
import random
from typing import List, Dict
from .models import Paper, RankedPaper
from async_llm_handler import Handler
from .search.bibtex import get_bibtex_from_doi, get_bibtex_from_title
import logging


logger = logging.getLogger(__name__)

RANKING_PROMPT = """
Analyze the relevance of the following papers to the query: "{claim}"

Papers:
{paper_summaries}

Rank these papers from most to least relevant based on the following criteria:
1. Direct relevance to the claim (either supporting or refuting it)
2. Quality and reliability of the research
3. Recency and impact of the findings
4. Prescence of relevant information. If methods or results section are not present in full detail, the paper cannot be considered evaluative of the claim and should be ranked lower.

Focus primarily on the full text content of each paper. Other metadata (title, authors, etc.) may be missing or incomplete, but should not significantly affect your ranking if the full text is present.

Your response should be in the following JSON format:
{{
  "rankings": [
    {{
      "paper_id": "string",
      "rank": integer,
      "explanation": "string"
    }},
    ...
  ]
}}

Ensure that each paper is assigned a unique rank from 1 to {num_papers}, where 1 is the most relevant. Provide a concise, technical explanation for each ranking, focusing on how the paper's content directly addresses the claim.
"""

ANALYSIS_PROMPT = """
Provide a detailed, technical analysis of the following paper's relevance to the query: "{claim}"

Paper Full Text: {full_text}

Additional metadata (if available):
Title: {title}
Authors: {authors}
Publication Year: {year}
DOI: {doi}
Abstract: {abstract}

Your response must be in the following JSON format:
{{
  "analysis": "string",
  "relevant_quotes": [
    "string",
    "string",
    "string"
  ]
}}

In the analysis:
1. Evaluate how directly the paper addresses the claim, either supporting or refuting it.
2. Assess the methodology, sample size, and statistical significance of the findings.
3. Consider any limitations or potential biases in the study.
4. Discuss how the paper's findings contribute to the broader understanding of the claim.

Extract exactly three relevant quotes from the paper that best support your analysis. These should be verbatim excerpts that directly relate to the claim.

Ensure your analysis is highly precise, technical, and grounded in the paper's content. Avoid general statements and focus on specific details from the study.
"""


def create_balanced_groups(papers: List[Paper], min_group_size: int = 2, max_group_size: int = 5) -> List[List[Paper]]:
    """Create balanced groups of papers, ensuring each group has at least min_group_size papers."""
    num_papers = len(papers)
    logger.info(f"Creating balanced groups for {num_papers} papers")
    logger.info(f"min_group_size: {min_group_size}, max_group_size: {max_group_size}")

    if num_papers < min_group_size:
        logger.warning(f"Too few papers ({num_papers}) to create groups. Returning single group.")
        return [papers]

    try:
        if num_papers < max_group_size:
            logger.info(f"Number of papers ({num_papers}) less than max_group_size ({max_group_size}). Using num_papers as group size.")
            group_size = num_papers
        else:
            inner_division = num_papers // max_group_size
            logger.info(f"Inner division result: {inner_division}")
            if inner_division == 0:
                logger.warning(f"Inner division resulted in zero. Using max_group_size ({max_group_size}) as group size.")
                group_size = max_group_size
            else:
                group_size = min(max_group_size, max(min_group_size, num_papers // inner_division))
        
        logger.info(f"Calculated group size: {group_size}")

        # Create initial groups
        groups = [papers[i:i+group_size] for i in range(0, num_papers, group_size)]
        
        # Redistribute papers from the last group if it's too small
        if len(groups[-1]) < min_group_size:
            last_group = groups.pop()
            for i, paper in enumerate(last_group):
                groups[i % len(groups)].append(paper)
        
        logger.info(f"Created {len(groups)} groups")
        return groups

    except Exception as e:
        logger.error(f"Error in create_balanced_groups: {str(e)}")
        logger.error(f"Falling back to single group")
        return [papers]

async def retry_llm_query(handler: Handler, prompt: str, model: str, max_retries: int = 3) -> Dict[str, any]:
    """Retry LLM query with error handling and JSON parsing."""
    for attempt in range(max_retries):
        try:
            response = await handler.query(prompt, model=model, json_mode=True)
            if isinstance(response, str):
                return json.loads(response)
            return response
        except json.JSONDecodeError:
            logger.warning(f"Attempt {attempt + 1}/{max_retries}: Failed to parse LLM response as JSON.")
            if attempt == max_retries - 1:
                logger.error(f"All attempts failed. Last response: {response}")
                raise ValueError("Failed to get valid JSON response after multiple attempts")
        except Exception as e:
            logger.error(f"Error during LLM query: {str(e)}")
            raise

async def rank_group(handler: Handler, claim: str, papers: List[Paper]) -> List[Dict[str, any]]:
    """Rank a group of papers using the LLM."""
    paper_summaries = "\n".join([
        f"Paper ID: {paper.id}\n"
        f"Full Text: {getattr(paper, 'full_text', 'N/A')[:500]}...\n"
        f"Title: {getattr(paper, 'title', 'N/A')}\n"
        f"Abstract: {getattr(paper, 'abstract', 'N/A')[:200]}..."
        for paper in papers
    ])
    prompt = RANKING_PROMPT.format(claim=claim, paper_summaries=paper_summaries, num_papers=len(papers))
    
    try:
        rankings = await retry_llm_query(handler, prompt, model="gpt_4o_mini")
        print(f"Group Rankings: {rankings}")
        
        if isinstance(rankings, dict) and "rankings" in rankings:
            rankings = rankings["rankings"]
        
        if not isinstance(rankings, list) or len(rankings) != len(papers):
            logger.warning(f"Unexpected rankings format. Expected list of {len(papers)} items, got: {rankings}")
            raise ValueError("Unexpected rankings format")
        
        return rankings
    except Exception as e:
        logger.error(f"Error during ranking: {str(e)}")
        return []

async def analyze_paper(handler: Handler, claim: str, paper: Paper) -> Dict[str, any]:
    """Analyze a single paper for relevance and extract quotes."""
    prompt = ANALYSIS_PROMPT.format(
        claim=claim,
        full_text=getattr(paper, 'full_text', 'N/A'),
        title=getattr(paper, 'title', 'N/A'),
        authors=getattr(paper, 'authors', 'N/A'),
        year=getattr(paper, 'year', 'N/A'),
        doi=getattr(paper, 'doi', 'N/A'),
        abstract=getattr(paper, 'abstract', 'N/A')
    )
    
    try:
        analysis = await retry_llm_query(handler, prompt, model="gpt_4o_mini")
        print(f"Paper Analysis: {analysis}")
        
        if not isinstance(analysis, dict) or 'analysis' not in analysis or 'relevant_quotes' not in analysis:
            logger.warning("Incomplete analysis received")
            raise ValueError("Incomplete analysis received")
        
        return analysis
    except Exception as e:
        logger.error(f"Error during paper analysis: {str(e)}")
        return {"analysis": "", "relevant_quotes": []}

async def rank_papers(papers: List[Paper], claim: str, num_rounds: int = 3, top_n: int = 5) -> List[RankedPaper]:
    """Rank papers based on their relevance to the given claim."""
    handler = Handler()
    
    logger.info(f"Starting to rank {len(papers)} papers")

    # Filter out papers with no full text or full text shorter than 200 words
    valid_papers = [paper for paper in papers if getattr(paper, 'full_text', '') and len(getattr(paper, 'full_text', '').split()) >= 200]
    logger.info(f"After filtering, {len(valid_papers)} valid papers remain")
    
    # Remove duplicates based on DOI or title
    unique_papers = []
    seen_dois = set()
    seen_titles = set()
    for paper in valid_papers:
        if getattr(paper, 'doi', None) and paper.doi not in seen_dois:
            seen_dois.add(paper.doi)
            unique_papers.append(paper)
        elif getattr(paper, 'title', None) and paper.title not in seen_titles:
            seen_titles.add(paper.title)
            unique_papers.append(paper)
    
    # Assign unique IDs to papers if not already present
    for i, paper in enumerate(unique_papers):
        if not hasattr(paper, 'id'):
            setattr(paper, 'id', f"paper_{i}")
    
    paper_scores: Dict[str, List[float]] = {paper.id: [] for paper in unique_papers}
    
    for round in range(num_rounds):
        logger.info(f"Starting ranking round {round + 1} of {num_rounds}")
        shuffled_papers = random.sample(unique_papers, len(unique_papers))
        
        # Create balanced groups
        paper_groups = create_balanced_groups(shuffled_papers)
        
        # Rank each group
        ranking_tasks = [rank_group(handler, claim, group) for group in paper_groups]
        group_rankings = await asyncio.gather(*ranking_tasks)
        
        # Accumulate scores
        for rankings in group_rankings:
            group_size = len(rankings)
            for ranking in rankings:
                paper_id = ranking['paper_id']
                rank = ranking['rank']
                # Normalize score based on group size
                score = (group_size - rank + 1) / group_size
                paper_scores[paper_id].append(score)
    
    # Calculate average scores, handling potential division by zero
    average_scores = {}
    for paper_id, scores in paper_scores.items():
        if scores:
            average_scores[paper_id] = sum(scores) / len(scores)
        else:
            logger.warning(f"No scores recorded for paper {paper_id}. Assigning lowest score.")
            average_scores[paper_id] = 0
    
    # Sort papers by average score
    sorted_papers = sorted(unique_papers, key=lambda p: average_scores[p.id], reverse=True)
    
    # Analyze top N papers
    top_papers = sorted_papers[:top_n]
    analysis_tasks = [analyze_paper(handler, claim, paper) for paper in top_papers]
    paper_analyses = await asyncio.gather(*analysis_tasks)
    
    # Create RankedPaper objects
    ranked_papers = []
    for paper, analysis in zip(top_papers, paper_analyses):
        paper_dict = paper.__dict__.copy()
        paper_dict.pop('id', None)  # Remove 'id' from the dictionary
        ranked_paper = RankedPaper(
            **paper_dict,
            relevance_score=average_scores[paper.id],
            analysis=analysis['analysis'],
            relevant_quotes=analysis['relevant_quotes']
        )
        # Generate BibTeX
        bibtex = get_bibtex_from_doi(ranked_paper.doi) if ranked_paper.doi else None
        if not bibtex:
            bibtex = get_bibtex_from_title(ranked_paper.title, ranked_paper.authors, ranked_paper.year)
        ranked_paper.bibtex = bibtex or ""
        ranked_papers.append(ranked_paper)
    
    logger.info(f"Completed paper ranking. Top score: {ranked_papers[0].relevance_score:.2f}, Bottom score: {ranked_papers[-1].relevance_score:.2f}")
    return ranked_papers
        </content>
    </file>
    <file>
        <name>paper_scraper.py</name>
        <path>academic_claim_analyzer\paper_scraper.py</path>
        <content>
import asyncio
import random
import aiohttp
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
from fake_useragent import UserAgent
import logging
import sys
import json
import fitz  # PyMuPDF
from bs4 import BeautifulSoup
import requests
from urllib.parse import urlparse

class UnifiedWebScraper:
    def __init__(self, session, max_concurrent_tasks=5):
        self.semaphore = asyncio.Semaphore(max_concurrent_tasks)
        self.user_agent = UserAgent()
        self.browser = None
        self.session = session
        self.logger = logging.getLogger(__name__)

    async def initialize(self):
        try:
            playwright = await async_playwright().start()
            self.browser = await playwright.chromium.launch(headless=True)
            self.logger.info("Browser initialized successfully")
        except Exception as e:
            self.logger.error(f"Failed to initialize browser: {str(e)}")
            raise

    async def close(self):
        if self.browser:
            await self.browser.close()
            self.logger.info("Browser closed")

    def normalize_url(self, url):
        if url.startswith("10.") or url.startswith("doi:"):
            return f"https://doi.org/{url.replace('doi:', '')}"
        parsed = urlparse(url)
        if not parsed.scheme:
            return f"http://{url}"
        return url

    async def scrape(self, url, min_words=700, max_retries=3):
        normalized_url = self.normalize_url(url)
        self.logger.info(f"Attempting to scrape URL: {normalized_url}")

        scraping_methods = [
            self.scrape_with_requests,
            self.scrape_with_playwright
        ]

        # Only add PDF scraping method if the URL ends with .pdf
        if normalized_url.lower().endswith('.pdf'):
            scraping_methods.append(self.scrape_pdf)

        best_result = ("", 0)
        for method in scraping_methods:
            self.logger.info(f"Trying method: {method.__name__}")
            for attempt in range(max_retries):
                try:
                    self.logger.info(f"Attempt {attempt + 1} with {method.__name__}")
                    content = await method(normalized_url)
                    word_count = len(content.split())
                    self.logger.info(f"Got {word_count} words from {method.__name__}")
                    if word_count > best_result[1]:
                        best_result = (content, word_count)
                    if word_count >= min_words:
                        self.logger.info(f"Successfully scraped URL: {normalized_url}")
                        return content
                except Exception as e:
                    self.logger.error(f"Error in {method.__name__} (attempt {attempt + 1}): {str(e)}")
                if attempt < max_retries - 1:
                    wait_time = random.uniform(1, 3)
                    self.logger.info(f"Waiting {wait_time:.2f} seconds before next attempt")
                    await asyncio.sleep(wait_time)

        self.logger.warning(f"Failed to meet minimum word count for URL: {normalized_url}")
        return best_result[0]

    async def scrape_with_requests(self, url):
        self.logger.info(f"Scraping with requests: {url}")
        response = requests.get(url, headers={"User-Agent": self.user_agent.random})
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, "html.parser")
            main_content = soup.find("div", id="abstract") or soup.find("main") or soup.find("body")
            if main_content:
                for script in main_content(["script", "style"]):
                    script.decompose()
                return main_content.get_text(separator="\n", strip=True)
        return ""

    async def scrape_with_playwright(self, url):
        self.logger.info(f"Scraping with Playwright: {url}")
        if not self.browser:
            await self.initialize()
        context = await self.browser.new_context(
            user_agent=self.user_agent.random,
            viewport={"width": 1920, "height": 1080},
            ignore_https_errors=True,
        )
        page = await context.new_page()
        try:
            await page.goto(url, wait_until="networkidle", timeout=15000)  # Increased timeout to 90 seconds
            content = await self.extract_text_content(page)
            return content
        except PlaywrightTimeoutError:
            self.logger.warning(f"Timeout occurred while loading {url}")
            return ""
        finally:
            await page.close()

    async def scrape_pdf(self, url):
        self.logger.info(f"Scraping PDF: {url}")
        async with self.session.get(url) as response:
            if response.status == 200:
                pdf_bytes = await response.read()
                return self.extract_text_from_pdf(pdf_bytes)
        return ""

    async def extract_text_content(self, page):
        try:
            await page.wait_for_selector("body", timeout=10000)
            text_content = await page.evaluate("""
                () => {
                    const elements = document.querySelectorAll('p, h1, h2, h3, h4, h5, h6, li, td, th');
                    return Array.from(elements).map(element => element.innerText).join(' ');
                }
            """)
            return text_content.strip()
        except Exception as e:
            self.logger.error(f"Failed to extract text content. Error: {str(e)}")
            return ""

    def extract_text_from_pdf(self, pdf_bytes):
        try:
            document = fitz.open("pdf", pdf_bytes)
            text = ""
            for page in document:
                text += page.get_text()
            return text.strip()
        except Exception as e:
            self.logger.error(f"Failed to extract text from PDF. Error: {str(e)}")
            return ""

async def main():
    log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    logging.basicConfig(
        level=logging.INFO,
        format=log_format,
        handlers=[
            logging.FileHandler("scraper.log"),
            logging.StreamHandler(sys.stdout),
        ],
    )

    async with aiohttp.ClientSession() as session:
        scraper = UnifiedWebScraper(session=session)
        try:
            await scraper.initialize()
        except Exception as e:
            logging.error(f"Initialization failed: {e}")
            return

        urls = [
            "10.1016/j.ifacol.2020.12.237",
            "10.1016/j.agwat.2023.108536",
            "10.1016/j.atech.2023.100251",
            "10.1016/j.atech.2023.100179",
            "10.1016/j.ifacol.2023.10.677",
            "10.1016/j.ifacol.2023.10.1655",
            "10.1016/j.ifacol.2023.10.667",
            "10.1002/cjce.24764",
            "10.3390/app13084734",
            "10.1016/j.atech.2022.100074",
            "10.1007/s10668-023-04028-9",
            "10.1109/IJCNN54540.2023.10191862",
            "10.1201/9780429290152-5",
            "10.1016/j.jprocont.2022.10.003",
            "10.1016/j.rser.2022.112790",
            "10.1007/s11269-022-03191-4",
            "10.3390/app12094235",
            "10.3390/w14060889",
            "10.3390/su14031304",
        ]

        scrape_tasks = [asyncio.create_task(scraper.scrape(url)) for url in urls]
        scraped_contents = await asyncio.gather(*scrape_tasks)

        success_count = 0
        failure_count = 0

        print("\nScraping Results:\n" + "=" * 80)
        for url, content in zip(urls, scraped_contents):
            word_count = len(content.split())
            if word_count >= 700:
                first_100_words = " ".join(content.split()[:100])
                print(f"\nURL: {url}\nStatus: Success\nWord count: {word_count}\nFirst 100 words: {first_100_words}\n" + "-" * 80)
                success_count += 1
            else:
                print(f"\nURL: {url}\nStatus: Failure (insufficient words)\nWord count: {word_count}\n" + "-" * 80)
                failure_count += 1

        print("\nSummary:\n" + "=" * 80)
        print(f"Total URLs scraped: {len(urls)}")
        print(f"Successful scrapes: {success_count}")
        print(f"Failed scrapes: {failure_count}")

        await scraper.close()

if __name__ == "__main__":
    asyncio.run(main())
        </content>
    </file>
    <file>
        <name>query_formulator.py</name>
        <path>academic_claim_analyzer\query_formulator.py</path>
        <content>
# src/academic_claim_analyzer/query_formulator.py

from typing import List
import json
from async_llm_handler import Handler

SCOPUS_SEARCH_GUIDE = """
Syntax and Operators

Valid syntax for advanced search queries includes:

Field codes (e.g. TITLE, ABS, KEY, AUTH, AFFIL) to restrict searches to specific parts of documents
Boolean operators (AND, OR, AND NOT) to combine search terms
Proximity operators (W/n, PRE/n) to find words within a specified distance - W/n: Finds terms within "n" words of each other, regardless of order. Example: journal W/15 publishing finds articles where "journal" and "publishing" are within two words of each other. - PRE/n: Finds terms in the specified order and within "n" words of each other. Example: data PRE/50 analysis finds articles where "data" appears before "analysis" within three words. - To find terms in the same sentence, use 15. To find terms in the same paragraph, use 50 -
Quotation marks for loose/approximate phrase searches
Braces {} for exact phrase searches
Wildcards (*) to capture variations of search terms
Invalid syntax includes:

Mixing different proximity operators (e.g. W/n and PRE/n) in the same expression
Using wildcards or proximity operators with exact phrase searches
Placing AND NOT before other Boolean operators
Using wildcards on their own without any search terms
Ideal Search Structure

An ideal advanced search query should:

Use field codes to focus the search on the most relevant parts of documents
Combine related concepts using AND and OR
Exclude irrelevant terms with AND NOT at the end
Employ quotation marks and braces appropriately for phrase searching
Include wildcards to capture variations of key terms (while avoiding mixing them with other operators)
Follow the proper order of precedence for operators
Complex searches should be built up systematically, with parentheses to group related expressions as needed. The information from the provided documents on syntax rules and operators should be applied rigorously.

** Critical: all double quotes other than the outermost ones should be preceded by a backslash (") to escape them in the JSON format. Failure to do so will result in an error when parsing the JSON string. **

Example Advanced Searches

{
  "queries": [
    "TITLE-ABS-KEY((\"precision agriculture\" OR \"precision farming\") AND (\"machine learning\" OR \"AI\") AND \"water\")",
    "TITLE-ABS-KEY((iot OR \"internet of things\") AND (irrigation OR watering) AND sensor*)",
    "TITLE-ABS-Key((\"precision farming\" OR \"precision agriculture\") AND (\"deep learning\" OR \"neural networks\") AND \"water\")",
    "TITLE-ABS-KEY((crop W/5 monitor*) AND \"remote sensing\" AND (irrigation OR water*))",
    "TITLE(\"precision irrigation\" OR \"variable rate irrigation\" AND \"machine learning\")"
  ]
}


** Critical: all double quotes other than the outermost ones should be preceded by a backslash (") to escape them in the JSON format. Failure to do so will result in an error when parsing the JSON string. **. 

These example searches demonstrate different ways to effectively combine key concepts related to precision agriculture, irrigation, real-time monitoring, IoT, machine learning and related topics using advanced search operators. They make use of field codes, Boolean and proximity operators, phrase searching, and wildcards to construct targeted, comprehensive searches to surface the most relevant research. The topic focus is achieved through carefully chosen search terms covering the desired themes.
"""

OPENALEX_SEARCH_GUIDE = """
Syntax and Operators
Valid syntax for advanced alex search queries includes:
Using quotation marks %22%22 for exact phrase matches
Adding a minus sign - before terms to exclude them
Employing the OR operator in all caps to find pages containing either term
Using the site%3A operator to limit results to a specific website
Applying the filetype%3A operator to find specific file formats like PDF, DOC, etc.
Adding the * wildcard as a placeholder for unknown words
`
Invalid syntax includes:
Putting a plus sign + before words (alex stopped supporting this)
Using other special characters like %3F, %24, %26, %23, etc. within search terms
Explicitly using the AND operator (alex's default behavior makes it redundant)

Ideal Search Structure
An effective alex search query should:
Start with the most important search terms
Use specific, descriptive keywords related to irrigation scheduling, management, and precision irrigation
Utilize exact phrases in %22quotes%22 for specific word combinations
Exclude irrelevant terms using the - minus sign
Connect related terms or synonyms with OR
Apply the * wildcard strategically for flexibility
Note:

By following these guidelines and using proper URL encoding, you can construct effective and accurate search queries for alex.

Searches should be concise yet precise, following the syntax rules carefully. 

Example Searches
{
  "queries": [
    "https://api.openalex.org/works?search=%22precision+irrigation%22+%2B%22soil+moisture+sensors%22+%2B%22irrigation+scheduling%22&sort=relevance_score:desc&per-page=30",
    "https://api.openalex.org/works?search=%22machine+learning%22+%2B%22irrigation+management%22+%2B%22crop+water+demand+prediction%22&sort=relevance_score:desc&per-page=30",
    "https://api.openalex.org/works?search=%22IoT+sensors%22+%2B%22real-time%22+%2B%22soil+moisture+monitoring%22+%2B%22crop+water+stress%22&sort=relevance_score:desc&per-page=30",
    "https://api.openalex.org/works?search=%22remote+sensing%22+%2B%22vegetation+indices%22+%2B%22irrigation+scheduling%22&sort=relevance_score:desc&per-page=30",
    "https://api.openalex.org/works?search=%22wireless+sensor+networks%22+%2B%22precision+agriculture%22+%2B%22variable+rate+irrigation%22+%2B%22irrigation+automation%22&sort=relevance_score:desc&per-page=30"
  ]
}

These example searches demonstrate how to create targeted, effective alex searches. They focus on specific topics, exclude irrelevant results, allow synonym flexibility, and limit to relevant domains when needed. The search terms are carefully selected to balance relevance and specificity while avoiding being overly restrictive.  By combining relevant keywords, exact phrases, and operators, these searches help generate high-quality results for the given topics.
"""

GENERATE_QUERIES = """
You are tasked with generating optimized search queries to find relevant research articles addressing a specific point. Follow these instructions carefully:

1. Review the following claim that needs to be addressed by the literature search:
<claim>
{CLAIM}
</claim>

2. Consider the following search guidance:
<search_guidance>
{SEARCH_GUIDANCE}
</search_guidance>

3. Generate {NUM_QUERIES} highly optimized search queries that would surface the most relevant, insightful, and comprehensive set of research articles to shed light on various aspects of the given point. Your queries should:

- Directly address the key issues and nuances of the point content
- Demonstrate creativity and variety to capture different dimensions of the topic
- Use precise terminology and logical operators for high-quality results
- Cover a broad range of potential subtopics, perspectives, and article types related to the point
- Strictly adhere to any specific requirements provided in the search guidance

4. Provide your response as a list of strings in the following format:

[
  "query_1",
  "query_2",
  "query_3",
  ...
]

Replace query_1, query_2, etc. with your actual search queries. The number of queries should match {NUM_QUERIES}.

5. If the search guidance specifies a particular platform (e.g., Scopus, Web of Science), ensure your queries are formatted appropriately for that platform.

6. Important: If your queries contain quotation marks, ensure they are properly escaped with a backslash (\") to maintain valid list formatting.

Generate the list of search queries now, following the instructions above.
"""




async def formulate_queries(claim: str, num_queries: int, query_type: str) -> List[str]:
    """
    Generate search queries based on the given claim.

    Args:
        claim (str): The claim to generate queries for.
        num_queries (int): The number of queries to generate.
        query_type (str): The type of query to generate ('scopus' or 'openalex').

    Returns:
        List[str]: A list of generated search queries.
    """
    handler = Handler()

    if query_type.lower() == 'scopus':
        search_guidance = SCOPUS_SEARCH_GUIDE
    elif query_type.lower() == 'openalex':
        search_guidance = OPENALEX_SEARCH_GUIDE
    else:
        raise ValueError(f"Unsupported query type: {query_type}")

    prompt = GENERATE_QUERIES.format(
        CLAIM=claim,
        SEARCH_GUIDANCE=search_guidance,
        NUM_QUERIES=num_queries,
        QUERY_TYPE=query_type
    )

    response = await handler.query(prompt, model="gpt_4o_mini", json_mode=True)
    
    if isinstance(response, str):
        try:
            parsed_response = json.loads(response)
        except json.JSONDecodeError:
            raise ValueError("Failed to parse LLM response as JSON")
    elif isinstance(response, dict):
        parsed_response = response
    else:
        raise ValueError("Unexpected response format from LLM")

    if "queries" not in parsed_response or not isinstance(parsed_response["queries"], list):
        raise ValueError("Invalid response format: 'queries' list not found")

    queries = parsed_response["queries"]
    print(queries)  

    if len(queries) != num_queries:
        raise ValueError(f"Expected {num_queries} queries, but got {len(queries)}")

    return queries
        </content>
    </file>
    <file>
        <name>test_scraper.py</name>
        <path>academic_claim_analyzer\test_scraper.py</path>
        <content>
import os
import re
import asyncio
import random
import aiohttp
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
from fake_useragent import UserAgent
import logging
import sys
import json
import fitz  # PyMuPDF
from bs4 import BeautifulSoup
import requests
from urllib.parse import urlparse
from markdownify import markdownify as md

class UnifiedWebScraper:
    def __init__(self, session, max_concurrent_tasks=5):
        self.semaphore = asyncio.Semaphore(max_concurrent_tasks)
        self.user_agent = UserAgent()
        self.browser = None
        self.session = session
        self.logger = logging.getLogger(__name__)

    async def initialize(self):
        try:
            playwright = await async_playwright().start()
            self.browser = await playwright.chromium.launch(headless=True)
            self.logger.info("Browser initialized successfully")
        except Exception as e:
            self.logger.error(f"Failed to initialize browser: {str(e)}")
            raise

    async def close(self):
        if self.browser:
            await self.browser.close()
            self.logger.info("Browser closed")

    def normalize_url(self, url):
        if url.startswith("10.") or url.startswith("doi:"):
            return f"https://doi.org/{url.replace('doi:', '')}"
        parsed = urlparse(url)
        if not parsed.scheme:
            return f"http://{url}"
        return url

    async def scrape(self, url, min_words=700, max_retries=3):
        normalized_url = self.normalize_url(url)
        self.logger.info(f"Attempting to scrape URL: {normalized_url}")

        scraping_methods = [
            self.scrape_with_requests,
            self.scrape_with_playwright
        ]

        if normalized_url.lower().endswith('.pdf'):
            scraping_methods.append(self.scrape_pdf)

        best_result = ("", 0)
        for method in scraping_methods:
            self.logger.info(f"Trying method: {method.__name__}")
            for attempt in range(max_retries):
                try:
                    self.logger.info(f"Attempt {attempt + 1} with {method.__name__}")
                    content = await method(normalized_url)
                    word_count = len(content.split())
                    self.logger.info(f"Got {word_count} words from {method.__name__}")
                    if word_count > best_result[1]:
                        best_result = (content, word_count)
                    if word_count >= min_words:
                        self.logger.info(f"Successfully scraped URL: {normalized_url}")
                        return content
                except Exception as e:
                    self.logger.error(f"Error in {method.__name__} (attempt {attempt + 1}): {str(e)}")
                if attempt < max_retries - 1:
                    wait_time = random.uniform(1, 3)
                    self.logger.info(f"Waiting {wait_time:.2f} seconds before next attempt")
                    await asyncio.sleep(wait_time)

        self.logger.warning(f"Failed to meet minimum word count for URL: {normalized_url}")
        return best_result[0]

    async def scrape_with_requests(self, url):
        self.logger.info(f"Scraping with requests: {url}")
        response = requests.get(url, headers={"User-Agent": self.user_agent.random})
        if response.status_code == 200:
            return md(response.text)
        return ""

    async def scrape_with_playwright(self, url):
        self.logger.info(f"Scraping with Playwright: {url}")
        if not self.browser:
            await self.initialize()
        context = await self.browser.new_context(
            user_agent=self.user_agent.random,
            viewport={"width": 1920, "height": 1080},
            ignore_https_errors=True,
        )
        page = await context.new_page()
        try:
            await page.goto(url, wait_until="networkidle", timeout=15000)
            content = await page.content()
            return md(content)
        except PlaywrightTimeoutError:
            self.logger.warning(f"Timeout occurred while loading {url}")
            return ""
        finally:
            await page.close()

    async def scrape_pdf(self, url):
        self.logger.info(f"Scraping PDF: {url}")
        async with self.session.get(url) as response:
            if response.status == 200:
                pdf_bytes = await response.read()
                return self.extract_text_from_pdf(pdf_bytes)
        return ""

    def extract_text_from_pdf(self, pdf_bytes):
        try:
            document = fitz.open("pdf", pdf_bytes)
            text = ""
            for page in document:
                text += page.get_text()
            return text.strip()
        except Exception as e:
            self.logger.error(f"Failed to extract text from PDF. Error: {str(e)}")
            return ""

def sanitize_filename(url):
    # Remove the protocol and www. if present
    url = re.sub(r'^https?://(www\.)?', '', url)
    # Replace non-alphanumeric characters with underscores
    filename = re.sub(r'[^\w\-_\.]', '_', url)
    # Truncate if too long (max 255 characters for most file systems)
    return filename[:255] + '.md'

async def main():
    log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    logging.basicConfig(
        level=logging.INFO,
        format=log_format,
        handlers=[
            logging.FileHandler("scraper.log"),
            logging.StreamHandler(sys.stdout),
        ],
    )

    output_folder = r"C:\Users\bnsoh2\Desktop\test"
    os.makedirs(output_folder, exist_ok=True)

    async with aiohttp.ClientSession() as session:
        scraper = UnifiedWebScraper(session=session)
        try:
            await scraper.initialize()
        except Exception as e:
            logging.error(f"Initialization failed: {e}")
            return

        urls = [
            "10.1016/j.ifacol.2020.12.237",
            "10.1016/j.agwat.2023.108536",
            "10.1016/j.atech.2023.100251",
            "10.1016/j.atech.2023.100179",
            "10.1016/j.ifacol.2023.10.677",
            "10.1016/j.ifacol.2023.10.1655",
            "10.1016/j.ifacol.2023.10.667",
            "10.1002/cjce.24764",
            "10.3390/app13084734",
            "10.1016/j.atech.2022.100074",
            "10.1007/s10668-023-04028-9",
            "10.1109/IJCNN54540.2023.10191862",
            "10.1201/9780429290152-5",
            "10.1016/j.jprocont.2022.10.003",
            "10.1016/j.rser.2022.112790",
            "10.1007/s11269-022-03191-4",
            "10.3390/app12094235",
            "10.3390/w14060889",
            "10.3390/su14031304",
        ]

        success_count = 0
        failure_count = 0

        for url in urls:
            try:
                content = await scraper.scrape(url)
                word_count = len(content.split())
                
                normalized_url = scraper.normalize_url(url)
                filename = sanitize_filename(normalized_url)
                file_path = os.path.join(output_folder, filename)
                
                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(content)
                
                if word_count >= 700:
                    print(f"\nURL: {url}\nStatus: Success\nWord count: {word_count}\nSaved as: {filename}\n" + "-" * 80)
                    success_count += 1
                else:
                    print(f"\nURL: {url}\nStatus: Failure (insufficient words)\nWord count: {word_count}\nSaved as: {filename}\n" + "-" * 80)
                    failure_count += 1
            
            except Exception as e:
                print(f"\nURL: {url}\nStatus: Error\nError message: {str(e)}\n" + "-" * 80)
                failure_count += 1

        print("\nSummary:\n" + "=" * 80)
        print(f"Total URLs scraped: {len(urls)}")
        print(f"Successful scrapes: {success_count}")
        print(f"Failed scrapes: {failure_count}")
        print(f"Results saved in: {output_folder}")

        await scraper.close()

if __name__ == "__main__":
    asyncio.run(main())
        </content>
    </file>
    <file>
        <name>__init__.py</name>
        <path>academic_claim_analyzer\__init__.py</path>
        <content>
# academic_claim_analyzer/__init__.py

from .main import analyze_claim
from .models import ClaimAnalysis
from .batch_processor import batch_analyze_claims, print_results_summary, print_detailed_result, print_schema
        </content>
    </file>
    </directory>
        <directory name="search">
    <file>
        <name>base.py</name>
        <path>academic_claim_analyzer\search\base.py</path>
        <content>
# src/academic_claim_analyzer/search/base.py

from abc import ABC, abstractmethod
from typing import List
from ..models import Paper

class BaseSearch(ABC):
    @abstractmethod
    async def search(self, query: str, limit: int) -> List[Paper]:
        """
        Perform a search using the given query and return a list of search results.

        Args:
            query (str): The search query.
            limit (int): The maximum number of results to return.

        Returns:
            List[Paper]: A list of search results.
        """
        pass
        </content>
    </file>
    <file>
        <name>bibtex.py</name>
        <path>academic_claim_analyzer\search\bibtex.py</path>
        <content>
# academic_claim_analyzer/search/bibtex.py

import requests
from typing import Optional

def get_bibtex_from_doi(doi: str) -> Optional[str]:
    """
    Fetch BibTeX data for a given DOI using the Crossref API.
    """
    url = f"https://api.crossref.org/works/{doi}/transform/application/x-bibtex"
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    return None

def get_bibtex_from_title(title: str, authors: list, year: int) -> Optional[str]:
    """
    Search for a paper using its title, authors, and year, then fetch its BibTeX data using the Crossref API.
    """
    query = f"{title} {' '.join(authors)} {year}"
    url = f"https://api.crossref.org/works?query={query}&rows=1"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data['message']['items']:
            doi = data['message']['items'][0].get('DOI')
            if doi:
                return get_bibtex_from_doi(doi)
    return None

# Test with main code
def main():
    # Example 1: Get BibTeX from DOI
    doi = "10.1016/j.ifacol.2020.12.237"
    bibtex_from_doi = get_bibtex_from_doi(doi)
    print("BibTeX from DOI:")
    print(bibtex_from_doi)
    print("\n")

    # Example 2: Get BibTeX from title, authors, and year
    title = "Optimal control of greenhouse climate using PID and MPC algorithms"
    authors = ["Hasni", "A.", "Taibi", "R.", "Draoui", "B.", "Boulard", "T."]
    year = 2020
    bibtex_from_title = get_bibtex_from_title(title, authors, year)
    print("BibTeX from title, authors, and year:")
    print(bibtex_from_title)

if __name__ == "__main__":
    main()
        </content>
    </file>
    <file>
        <name>core_search.py</name>
        <path>academic_claim_analyzer\search\core_search.py</path>
        <content>
# src/academic_claim_analyzer/search/core_search.py

import aiohttp
import os
from typing import List
from dotenv import load_dotenv
from .base import BaseSearch
from ..models import Paper
import logging

logger = logging.getLogger(__name__)

load_dotenv()

class CORESearch(BaseSearch):
    def __init__(self):
        self.api_key = os.getenv("CORE_API_KEY")
        if not self.api_key:
            raise ValueError("CORE_API_KEY not found in environment variables")
        self.base_url = "https://api.core.ac.uk/v3"

    async def search(self, query: str, limit: int) -> List[Paper]:
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Accept": "application/json",
        }

        params = {
            "q": query,
            "limit": limit,
        }

        async with aiohttp.ClientSession() as session:
            try:
                async with session.post(f"{self.base_url}/search/works", headers=headers, json=params) as response:
                    if response.status == 200:
                        logger.info("CORE API request successful.")
                        data = await response.json()
                        return self._parse_results(data)
                    else:
                        logger.error(f"CORE API request failed with status code: {response.status}")
                        return []
            except Exception as e:
                logger.error(f"Error occurred while making CORE API request: {str(e)}")
                return []

    def _parse_results(self, data: dict) -> List[Paper]:
        results = []
        for entry in data.get("results", []):
            result = Paper(
                doi=entry.get("doi", ""),
                title=entry.get("title", ""),
                authors=[author["name"] for author in entry.get("authors", [])],
                year=entry.get("publicationYear", 0),
                abstract=entry.get("abstract", ""),
                pdf_link=entry.get("downloadUrl", ""),
                source=entry.get("publisher", ""),
                full_text=entry.get("fullText", ""),
                metadata={
                    "citation_count": entry.get("citationCount", 0),
                    "core_id": entry.get("id", "")
                }
            )
            results.append(result)
        return results
        </content>
    </file>
    <file>
        <name>openalex_search.py</name>
        <path>academic_claim_analyzer\search\openalex_search.py</path>
        <content>
# academic_claim_analyzer/search/openalex_search.py

import aiohttp
import asyncio
import urllib.parse
from typing import List
from .base import BaseSearch
from ..models import Paper
from ..paper_scraper import UnifiedWebScraper
import logging

logger = logging.getLogger(__name__)

class OpenAlexSearch(BaseSearch):
    def __init__(self, email: str):
        self.base_url = "https://api.openalex.org"
        self.email = email
        self.semaphore = asyncio.Semaphore(5)  # Limit to 5 concurrent requests

    async def search(self, query: str, limit: int) -> List[Paper]:
        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=600)) as session:
            encoded_query = urllib.parse.quote(query)
            search_url = f"{self.base_url}/works?search={encoded_query}&per_page={limit}&mailto={self.email}"

            async with self.semaphore:
                try:
                    await asyncio.sleep(0.2)  # Rate limiting
                    async with session.get(search_url) as response:
                        if response.status == 200:
                            data = await response.json()
                            return await self._parse_results(data, session)
                        else:
                            logger.error(f"Unexpected status code from OpenAlex API: {response.status}")
                            return []
                except Exception as e:
                    logger.error(f"Error occurred while making request to OpenAlex API: {str(e)}")
                    return []

    async def _parse_results(self, data: dict, session: aiohttp.ClientSession) -> List[Paper]:
        results = []
        scraper = UnifiedWebScraper(session)
        for work in data.get("results", []):
            result = Paper(
                doi=work.get("doi", ""),
                title=work.get("title", ""),
                authors=[author["author"]["display_name"] for author in work.get("authorships", [])],
                year=work.get("publication_year", 0),
                abstract=work.get("abstract"),
                pdf_link=work.get("primary_location", {}).get("pdf_url"),
                source=work.get("primary_location", {}).get("source", {}).get("display_name", ""),
                metadata={
                    "citation_count": work.get("cited_by_count", 0),
                    "openalex_id": work.get("id", "")
                }
            )
            try:
                if result.doi:
                    result.full_text = await scraper.scrape(f"https://doi.org/{result.doi}")
                if not result.full_text and result.pdf_link:
                    result.full_text = await scraper.scrape(result.pdf_link)
            except Exception as e:
                logger.error(f"Error scraping full text for {result.doi or result.pdf_link}: {str(e)}")
            results.append(result)
        await scraper.close()
        return results
        </content>
    </file>
    <file>
        <name>scopus_search.py</name>
        <path>academic_claim_analyzer\search\scopus_search.py</path>
        <content>
# academic_claim_analyzer/search/scopus_search.py

import aiohttp
import asyncio
import os
from typing import List
from collections import deque
import time
from dotenv import load_dotenv
from .base import BaseSearch
from ..models import Paper
from ..paper_scraper import UnifiedWebScraper
import logging

logger = logging.getLogger(__name__)

load_dotenv()

class ScopusSearch(BaseSearch):
    def __init__(self):
        self.api_key = os.getenv("SCOPUS_API_KEY")
        if not self.api_key:
            raise ValueError("SCOPUS_API_KEY not found in environment variables")
        self.base_url = "http://api.elsevier.com/content/search/scopus"
        self.request_times = deque(maxlen=6)
        self.semaphore = asyncio.Semaphore(5)  # Limit to 5 concurrent requests

    async def search(self, query: str, limit: int) -> List[Paper]:
        headers = {
            "X-ELS-APIKey": self.api_key,
            "Accept": "application/json",
        }
        
        params = {
            "query": query,
            "count": limit,
            "view": "COMPLETE",
        }

        async with aiohttp.ClientSession() as session:
            async with self.semaphore:
                try:
                    # Ensure compliance with the rate limit
                    await self._wait_for_rate_limit()

                    async with session.get(self.base_url, headers=headers, params=params) as response:
                        if response.status == 200:
                            data = await response.json()
                            return await self._parse_results(data, session)
                        else:
                            logger.error(f"Scopus API request failed with status code: {response.status}")
                            return []
                except Exception as e:
                    logger.error(f"Error occurred while making Scopus API request: {str(e)}")
                    return []

    async def _wait_for_rate_limit(self):
        while True:
            current_time = time.time()
            if not self.request_times or current_time - self.request_times[0] >= 1:
                self.request_times.append(current_time)
                break
            else:
                await asyncio.sleep(0.2)

    async def _parse_results(self, data: dict, session: aiohttp.ClientSession) -> List[Paper]:
        results = []
        scraper = UnifiedWebScraper(session)
        for entry in data.get("search-results", {}).get("entry", []):
            try:
                year = int(entry.get("prism:coverDate", "").split("-")[0])
            except (ValueError, IndexError):
                year = None
                logger.warning(f"Failed to parse year from coverDate: {entry.get('prism:coverDate')}")

            try:
                citation_count = int(entry.get("citedby-count", 0))
            except ValueError:
                citation_count = 0
                logger.warning(f"Failed to parse citation count: {entry.get('citedby-count')}")

            result = Paper(
                doi=entry.get("prism:doi", ""),
                title=entry.get("dc:title", ""),
                authors=[author.get("authname", "") for author in entry.get("author", [])],
                year=year,
                abstract=entry.get("dc:description", ""),
                source=entry.get("prism:publicationName", ""),
                metadata={
                    "citation_count": citation_count,
                    "scopus_id": entry.get("dc:identifier", ""),
                    "eid": entry.get("eid", "")
                }
            )
            try:
                if result.doi:
                    result.full_text = await scraper.scrape(f"https://doi.org/{result.doi}")
                if not result.full_text and result.pdf_link:
                    result.full_text = await scraper.scrape(result.pdf_link)
            except Exception as e:
                logger.error(f"Error scraping full text for {result.doi or result.pdf_link}: {str(e)}")
            results.append(result)
        await scraper.close()
        return results
        </content>
    </file>
    <file>
        <name>__init__.py</name>
        <path>academic_claim_analyzer\search\__init__.py</path>
        <content>
# academic_claim_analyzer/search/__init__.py


from .openalex_search import OpenAlexSearch 
from .scopus_search import ScopusSearch
from .core_search import CORESearch
from .base import BaseSearch
        </content>
    </file>
        </directory>
    <directory name="academic_claim_analyzer.egg-info">
    <file>
        <name>dependency_links.txt</name>
        <path>academic_claim_analyzer.egg-info\dependency_links.txt</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>PKG-INFO</name>
        <path>academic_claim_analyzer.egg-info\PKG-INFO</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>requires.txt</name>
        <path>academic_claim_analyzer.egg-info\requires.txt</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>SOURCES.txt</name>
        <path>academic_claim_analyzer.egg-info\SOURCES.txt</path>
        <content>Full content not provided</content>
    </file>
    <file>
        <name>top_level.txt</name>
        <path>academic_claim_analyzer.egg-info\top_level.txt</path>
        <content>Full content not provided</content>
    </file>
    </directory>
    <directory name="tests">
    <file>
        <name>test_claim_analysis_e2e.py</name>
        <path>tests\test_claim_analysis_e2e.py</path>
        <content>
# tests\test_claim_analysis_e2e.py



        </content>
    </file>
    <file>
        <name>test_main.py</name>
        <path>tests\test_main.py</path>
        <content>
# tests/test_main.py

import pytest
from unittest.mock import patch, MagicMock
from academic_claim_analyzer.main import analyze_claim
from academic_claim_analyzer.models import ClaimAnalysis, Paper, RankedPaper

@pytest.fixture
def mock_search_results():
    return [
        Paper(
            doi="10.1016/j.diabres.2023.03.050",
            title="Coffee consumption and risk of type 2 diabetes: An updated meta-analysis of prospective cohort studies",
            authors=["Jiali Zheng", "Jingjing Zhang", "Yong Zhou"],
            year=2023,
            abstract="This meta-analysis of prospective cohort studies suggests that coffee consumption is associated with a reduced risk of type 2 diabetes, with the strongest effect observed for 3-4 cups per day.",
            source="Diabetes Research and Clinical Practice"
        ),
        Paper(
            doi="10.2337/dc20-1800",
            title="Long-term coffee consumption, caffeine metabolism genetics, and risk of cardiovascular disease: a prospective analysis of up to 347,077 individuals and 8368 cases",
            authors=["Licia Iacoviello", "Marialaura Bonaccio", "Augusto Di Castelnuovo"],
            year=2021,
            abstract="This large prospective study suggests that coffee consumption is associated with a lower risk of cardiovascular disease, with the relationship influenced by caffeine metabolism genetics.",
            source="Diabetes Care"
        )
    ]

@pytest.fixture
def mock_ranked_papers():
    return [
        RankedPaper(
            doi="10.1016/j.diabres.2023.03.050",
            title="Coffee consumption and risk of type 2 diabetes: An updated meta-analysis of prospective cohort studies",
            authors=["Jiali Zheng", "Jingjing Zhang", "Yong Zhou"],
            year=2023,
            abstract="This meta-analysis of prospective cohort studies suggests that coffee consumption is associated with a reduced risk of type 2 diabetes, with the strongest effect observed for 3-4 cups per day.",
            source="Diabetes Research and Clinical Practice",
            relevance_score=0.95,
            relevant_quotes=["coffee consumption is associated with a reduced risk of type 2 diabetes"],
            analysis="This meta-analysis provides strong evidence supporting the claim that coffee consumption is associated with reduced risk of type 2 diabetes."
        ),
        RankedPaper(
            doi="10.2337/dc20-1800",
            title="Long-term coffee consumption, caffeine metabolism genetics, and risk of cardiovascular disease: a prospective analysis of up to 347,077 individuals and 8368 cases",

            authors=["Licia Iacoviello", "Marialaura Bonaccio", "Augusto Di Castelnuovo"],
            year=2021,
            abstract="This large prospective study suggests that coffee consumption is associated with a lower risk of cardiovascular disease, with the relationship influenced by caffeine metabolism genetics.",
            source="Diabetes Care",
            relevance_score=0.75,
            relevant_quotes=["coffee consumption is associated with a lower risk of cardiovascular disease"],
            analysis="While this study focuses on cardiovascular disease, it provides indirect support for the potential health benefits of coffee consumption, which may be relevant to the claim about type 2 diabetes risk reduction."
        )
    ]

@pytest.mark.asyncio
async def test_analyze_claim(mock_search_results, mock_ranked_papers):
    with patch('src.academic_claim_analyzer.query_formulator.formulate_queries') as mock_formulate_queries, \
         patch('src.academic_claim_analyzer.search.OpenAlexSearch.search') as mock_search, \
         patch('src.academic_claim_analyzer.paper_scraper.scrape_papers') as mock_scrape_papers, \
         patch('src.academic_claim_analyzer.paper_ranker.rank_papers') as mock_rank_papers:

        mock_formulate_queries.return_value = [
            "TITLE-ABS-KEY(coffee AND consumption AND (\"type 2 diabetes\" OR \"diabetes mellitus\") AND risk)",
            "TITLE-ABS-KEY(\"coffee intake\" AND \"diabetes risk\" AND (meta-analysis OR \"systematic review\"))"
        ]
        mock_search.return_value = mock_search_results
        mock_scrape_papers.return_value = mock_search_results
        mock_rank_papers.return_value = mock_ranked_papers

        claim = "Coffee consumption is associated with reduced risk of type 2 diabetes."
        result = await analyze_claim(claim, num_queries=2, papers_per_query=2, num_papers_to_return=2)

        assert isinstance(result, ClaimAnalysis)
        assert result.claim == claim
        assert len(result.queries) == 2
        assert len(result.search_results) == 4  # 2 queries * 2 papers per query
        assert len(result.ranked_papers) == 2

        top_papers = result.get_top_papers(2)
        assert len(top_papers) == 2
        assert top_papers[0].relevance_score >= top_papers[1].relevance_score
        assert "meta-analysis" in top_papers[0].title.lower()
        assert "type 2 diabetes" in top_papers[0].title.lower()

@pytest.mark.asyncio
async def test_analyze_claim_error_handling():
    with patch('src.academic_claim_analyzer.query_formulator.formulate_queries', side_effect=Exception("API rate limit exceeded")):
        claim = "Mindfulness meditation can help reduce symptoms of anxiety and depression."
        result = await analyze_claim(claim)
        assert isinstance(result, ClaimAnalysis)
        assert result.claim == claim
        assert "error" in result.metadata
        assert result.metadata["error"] == "API rate limit exceeded"
        assert len(result.queries) == 0
        assert len(result.search_results) == 0
        assert len(result.ranked_papers) == 0

@pytest.mark.asyncio
async def test_analyze_claim_no_results():
    with patch('src.academic_claim_analyzer.query_formulator.formulate_queries') as mock_formulate_queries, \
         patch('src.academic_claim_analyzer.search.OpenAlexSearch.search') as mock_search:

        mock_formulate_queries.return_value = ["TITLE-ABS-KEY(non_existent_topic AND improbable_research)"]
        mock_search.return_value = []

        claim = "Non-existent topic is related to improbable research outcomes."
        result = await analyze_claim(claim, num_queries=1, papers_per_query=5, num_papers_to_return=2)

        assert isinstance(result, ClaimAnalysis)
        assert result.claim == claim
        assert len(result.queries) == 1
        assert len(result.search_results) == 0
        assert len(result.ranked_papers) == 0
        assert "No relevant papers found" in result.metadata.get("analysis", "")

@pytest.mark.asyncio
async def test_analyze_claim_partial_results():
    with patch('src.academic_claim_analyzer.query_formulator.formulate_queries') as mock_formulate_queries, \
         patch('src.academic_claim_analyzer.search.OpenAlexSearch.search') as mock_search, \
         patch('src.academic_claim_analyzer.paper_scraper.scrape_papers') as mock_scrape_papers, \
         patch('src.academic_claim_analyzer.paper_ranker.rank_papers') as mock_rank_papers:

        mock_formulate_queries.return_value = [
            "TITLE-ABS-KEY(exercise AND \"cardiovascular health\" AND \"older adults\")",
            "TITLE-ABS-KEY(\"physical activity\" AND \"heart disease\" AND elderly)"
        ]
        mock_search.side_effect = [mock_search_results[:1], []]  # First query returns one result, second query returns no results
        mock_scrape_papers.return_value = mock_search_results[:1]
        mock_rank_papers.return_value = mock_ranked_papers[:1]

        claim = "Regular exercise is linked to improved cardiovascular health in older adults."
        result = await analyze_claim(claim, num_queries=2, papers_per_query=2, num_papers_to_return=2)

        assert isinstance(result, ClaimAnalysis)
        assert result.claim == claim
        assert len(result.queries) == 2
        assert len(result.search_results) == 1
        assert len(result.ranked_papers) == 1
        assert "Partial results found" in result.metadata.get("analysis", "")

if __name__ == "__main__":
    pytest.main()
        </content>
    </file>
    <file>
        <name>test_paper_ranker.py</name>
        <path>tests\test_paper_ranker.py</path>
        <content>
# tests\test_paper_ranker.py



        </content>
    </file>
    <file>
        <name>test_paper_scraper.py</name>
        <path>tests\test_paper_scraper.py</path>
        <content>
# tests\test_paper_scraper.py



        </content>
    </file>
    <file>
        <name>test_query_formulator.py</name>
        <path>tests\test_query_formulator.py</path>
        <content>
# tests/test_query_formulator.py

import pytest
from academic_claim_analyzer.query_formulator import formulate_queries
from unittest.mock import patch 

@pytest.mark.asyncio
@pytest.mark.parametrize("claim, num_queries, query_type", [
    (
        "Coffee consumption is associated with reduced risk of type 2 diabetes.",
        3,
        "scopus"
    ),
    (
        "Mindfulness meditation can help reduce symptoms of anxiety and depression.",
        4,
        "openalex"
    ),
    (
        "Regular exercise is linked to improved cardiovascular health in older adults.",
        5,
        "scopus"
    ),
])
async def test_formulate_queries(claim, num_queries, query_type):
    queries = await formulate_queries(claim, num_queries, query_type)
    assert isinstance(queries, list)
    assert len(queries) == num_queries
    for query in queries:
        print(query)  # Added for debugging, can be removed later
        assert isinstance(query, str)
        assert len(query) > 0

        if query_type.lower() == 'scopus':
            assert query.startswith("TITLE-ABS-KEY(") and query.endswith(")")
        elif query_type.lower() == 'openalex':
            assert query.startswith("https://api.openalex.org/works?search=")

@pytest.mark.asyncio
async def test_formulate_queries_invalid_query_type():
    with pytest.raises(ValueError, match="Unsupported query type"):
        await formulate_queries("Test claim", 3, "invalid_type")
        </content>
    </file>
    <file>
        <name>test_scrape_rank_integration.py</name>
        <path>tests\test_scrape_rank_integration.py</path>
        <content>
# tests\test_scrape_rank_integration.py



        </content>
    </file>
    <file>
        <name>test_search_integration.py</name>
        <path>tests\test_search_integration.py</path>
        <content>
# tests\test_search_integration.py



        </content>
    </file>
    </directory>
        <directory name="test_search">
    <file>
        <name>test_core.py</name>
        <path>tests\test_search\test_core.py</path>
        <content>
# tests/test_search/test_core.py

import pytest
from unittest.mock import patch, MagicMock

from academic_claim_analyzer.search.core_search import CORESearch
from academic_claim_analyzer.models import Paper

@pytest.fixture
def mock_core_response():
    return {
        "results": [
            {
                "doi": "10.1161/CIRCULATIONAHA.120.050775",
                "title": "Physical Activity and Cardiovascular Health in Older Adults: A Comprehensive Review",
                "authors": [
                    {"name": "Jennifer L. Carter"},
                    {"name": "Robert A. Thompson"},
                    {"name": "Lisa M. Brown"}
                ],
                "publicationYear": 2021,
                "abstract": "This comprehensive review examines the relationship between regular physical activity and cardiovascular health in older adults. The evidence strongly supports that engagement in regular exercise is associated with improved cardiovascular outcomes, including reduced risk of heart disease, stroke, and mortality.",
                "downloadUrl": "https://www.ahajournals.org/doi/pdf/10.1161/CIRCULATIONAHA.120.050775",
                "publisher": "Circulation",
                "fullText": "This is the full text of the paper, including detailed methods, results, and discussion...",
                "citationCount": 45,
                "id": "core:98765432"
            }
        ]
    }

@pytest.mark.asyncio
async def test_core_search(mock_core_response):
    with patch('aiohttp.ClientSession.post') as mock_post:
        mock_post.return_value.__aenter__.return_value.status = 200
        mock_post.return_value.__aenter__.return_value.json = MagicMock(return_value=mock_core_response)

        with patch.dict('os.environ', {'CORE_API_KEY': 'fake_api_key'}):
            search = CORESearch()
            results = await search.search("physical activity cardiovascular health older adults", 1)

            assert len(results) == 1
            paper = results[0]
            assert isinstance(paper, Paper)
            assert paper.doi == "10.1161/CIRCULATIONAHA.120.050775"
            assert paper.title == "Physical Activity and Cardiovascular Health in Older Adults: A Comprehensive Review"
            assert paper.authors == ["Jennifer L. Carter", "Robert A. Thompson", "Lisa M. Brown"]
            assert paper.year == 2021
            assert "relationship between regular physical activity and cardiovascular health in older adults" in paper.abstract
            assert paper.pdf_link == "https://www.ahajournals.org/doi/pdf/10.1161/CIRCULATIONAHA.120.050775"
            assert paper.source == "Circulation"
            assert "This is the full text of the paper" in paper.full_text
            assert paper.metadata["citation_count"] == 45
            assert paper.metadata["core_id"] == "core:98765432"

@pytest.mark.asyncio
async def test_core_search_error():
    with patch('aiohttp.ClientSession.post') as mock_post:
        mock_post.return_value.__aenter__.return_value.status = 500

        with patch.dict('os.environ', {'CORE_API_KEY': 'fake_api_key'}):
            search = CORESearch()
            results = await search.search("physical activity cardiovascular health older adults", 1)

            assert len(results) == 0
        </content>
    </file>
    <file>
        <name>test_openalex.py</name>
        <path>tests\test_search\test_openalex.py</path>
        <content>
# tests/test_search/test_openalex.py

import pytest
from unittest.mock import patch, MagicMock
from academic_claim_analyzer.search.openalex_search import OpenAlexSearch
from academic_claim_analyzer.models import Paper

@pytest.fixture
def mock_openalex_response():
    return {
        "results": [
            {
                "doi": "10.1016/j.diabres.2023.03.050",
                "title": "Coffee consumption and risk of type 2 diabetes: An updated meta-analysis of prospective cohort studies",
                "authorships": [
                    {"author": {"display_name": "Jiali Zheng"}},
                    {"author": {"display_name": "Jingjing Zhang"}},
                    {"author": {"display_name": "Yong Zhou"}}
                ],
                "publication_year": 2023,
                "abstract": "This meta-analysis of prospective cohort studies suggests that coffee consumption is associated with a reduced risk of type 2 diabetes, with the strongest effect observed for 3-4 cups per day.",
                "primary_location": {
                    "pdf_url": "https://www.sciencedirect.com/science/article/pii/S0168822723001512/pdf",
                    "source": {"display_name": "Diabetes Research and Clinical Practice"}
                },
                "cited_by_count": 12,
                "id": "W4235689012"
            }
        ]
    }

@pytest.mark.asyncio
async def test_openalex_search(mock_openalex_response):
    with patch('aiohttp.ClientSession.get') as mock_get:
        mock_get.return_value.__aenter__.return_value.status = 200
        mock_get.return_value.__aenter__.return_value.json = MagicMock(return_value=mock_openalex_response)

        search = OpenAlexSearch(email="researcher@university.edu")
        results = await search.search("coffee consumption type 2 diabetes", 1)

        assert len(results) == 1
        paper = results[0]
        assert isinstance(paper, Paper)
        assert paper.doi == "10.1016/j.diabres.2023.03.050"
        assert paper.title == "Coffee consumption and risk of type 2 diabetes: An updated meta-analysis of prospective cohort studies"
        assert paper.authors == ["Jiali Zheng", "Jingjing Zhang", "Yong Zhou"]
        assert paper.year == 2023
        assert "coffee consumption is associated with a reduced risk of type 2 diabetes" in paper.abstract
        assert paper.pdf_link == "https://www.sciencedirect.com/science/article/pii/S0168822723001512/pdf"
        assert paper.source == "Diabetes Research and Clinical Practice"
        assert paper.metadata["citation_count"] == 12
        assert paper.metadata["openalex_id"] == "W4235689012"

@pytest.mark.asyncio
async def test_openalex_search_error():
    with patch('aiohttp.ClientSession.get') as mock_get:
        mock_get.return_value.__aenter__.return_value.status = 500

        search = OpenAlexSearch(email="researcher@university.edu")
        results = await search.search("coffee consumption type 2 diabetes", 1)

        assert len(results) == 0
        </content>
    </file>
    <file>
        <name>test_scopus.py</name>
        <path>tests\test_search\test_scopus.py</path>
        <content>
# tests/test_search/test_scopus.py

import pytest
from unittest.mock import patch, MagicMock
from academic_claim_analyzer.search.scopus_search import ScopusSearch
from academic_claim_analyzer.models import Paper

@pytest.fixture
def mock_scopus_response():
    return {
        "search-results": {
            "entry": [
                {
                    "prism:doi": "10.1016/j.jad.2022.01.053",
                    "dc:title": "Mindfulness-based interventions for anxiety and depression in adults: A meta-analysis of randomized controlled trials",
                    "author": [
                        {"authname": "Sarah J. Goldberg"},
                        {"authname": "Michael A. Smith"},
                        {"authname": "Emily R. Johnson"}
                    ],
                    "prism:coverDate": "2022-04-15",
                    "dc:description": "This meta-analysis provides evidence that mindfulness-based interventions are effective in reducing symptoms of anxiety and depression in adults, with moderate effect sizes observed across various clinical and non-clinical populations.",
                    "prism:publicationName": "Journal of Affective Disorders",
                    "citedby-count": "87",
                    "dc:identifier": "SCOPUS_ID:85123456789",
                    "eid": "2-s2.0-85123456789"
                }
            ]
        }
    }

@pytest.mark.asyncio
async def test_scopus_search(mock_scopus_response):
    with patch('aiohttp.ClientSession.get') as mock_get:
        mock_get.return_value.__aenter__.return_value.status = 200
        mock_get.return_value.__aenter__.return_value.json = MagicMock(return_value=mock_scopus_response)

        with patch.dict('os.environ', {'SCOPUS_API_KEY': 'fake_api_key'}):
            search = ScopusSearch()
            results = await search.search("mindfulness anxiety depression meta-analysis", 1)

            assert len(results) == 1
            paper = results[0]
            assert isinstance(paper, Paper)
            assert paper.doi == "10.1016/j.jad.2022.01.053"
            assert paper.title == "Mindfulness-based interventions for anxiety and depression in adults: A meta-analysis of randomized controlled trials"
            assert paper.authors == ["Sarah J. Goldberg", "Michael A. Smith", "Emily R. Johnson"]
            assert paper.year == 2022
            assert "mindfulness-based interventions are effective in reducing symptoms of anxiety and depression" in paper.abstract
            assert paper.source == "Journal of Affective Disorders"
            assert paper.metadata["citation_count"] == 87
            assert paper.metadata["scopus_id"] == "SCOPUS_ID:85123456789"
            assert paper.metadata["eid"] == "2-s2.0-85123456789"

@pytest.mark.asyncio
async def test_scopus_search_error():
    with patch('aiohttp.ClientSession.get') as mock_get:
        mock_get.return_value.__aenter__.return_value.status = 500

        with patch.dict('os.environ', {'SCOPUS_API_KEY': 'fake_api_key'}):
            search = ScopusSearch()
            results = await search.search("mindfulness anxiety depression meta-analysis", 1)

            assert len(results) == 0
        </content>
    </file>
        </directory>
    <file>
        <name>LICENSE</name>
        <path>LICENSE</path>
        <content>

        </content>
    </file>
    <file>
        <name>README.md</name>
        <path>README.md</path>
        <content>

        </content>
    </file>
    <file>
        <name>requirements.txt</name>
        <path>requirements.txt</path>
        <content>
# src/requirements.txt

aiohttp
anthropic
google-generativeai
openai
python-dotenv
tiktoken
asyncio
pytest
pytest-asyncio
beautifulsoup4
PyMuPDF
playwright==1.36.0
fake-useragent
async-llm-handler
        </content>
    </file>
    <file>
        <name>LICENSE</name>
        <path>.eggs\setuptools_scm-8.1.0-py3.11.egg\EGG-INFO\LICENSE</path>
        <content>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

        </content>
    </file>
    <file>
        <name>README.md</name>
        <path>.pytest_cache\README.md</path>
        <content>
# pytest cache directory #

This directory contains data from the pytest's cache plugin,
which provides the `--lf` and `--ff` options, as well as the `cache` fixture.

**Do not** commit this to version control.

See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.

        </content>
    </file>
</repository_structure>
