import os
import subprocess
import pandas as pd
from tqdm import tqdm
import sys
import gc
import psutil
import argparse


# Path to the CSV file where results will be stored
RESULTS_CSV = "eda/mutpy_results.csv"

# Initialize the DataFrame and CSV file
if not os.path.exists(RESULTS_CSV):
    results_df = pd.DataFrame(columns=["Test Name", "Target Name", "Run Result"])
    results_df.to_csv(RESULTS_CSV, index=False)


results_buffer = []  # Collect results in memory

def append_to_csv(test_name, target_name, run_result):
    results_buffer.append({"Test Name": test_name, "Target Name": target_name, "Run Result": run_result})

def flush_results_to_csv():
    if results_buffer:
        results_df = pd.DataFrame(results_buffer)
        if os.path.exists(RESULTS_CSV):
            existing_df = pd.read_csv(RESULTS_CSV)
            results_df = pd.concat([existing_df, results_df], ignore_index=True)
        results_df.to_csv(RESULTS_CSV, index=False)
        results_buffer.clear()  # Clear the buffer

def run_mutpy_on_tests(implementation_file, test_files_folder, additional_args, timeout_per_file):
    """
    Runs mut.py on all test files in the specified folder against the given implementation file.

    :param implementation_file: Path to the implementation file (target).
    :param test_files_folder: Path to the folder containing test files.
    :param additional_args: List of additional arguments to pass to mut.py (optional).
    """
    # Ensure additional_args is a list
    if additional_args is None:
        additional_args = []

    # Get all test files in the folder
    test_files = [
        os.path.join(test_files_folder, f)
        for f in os.listdir(test_files_folder)
        if f.startswith("test_") and f.endswith(".py")
    ]

    # Run mut.py for each test file
    for test_file in test_files:
        print(f"Running mut.py on {test_file}...")
        command = [
            "mut.py",
            "--target", implementation_file,
            "--unit-test", test_file,
        ] + additional_args

        # Save the output to a file in the task folder
        test_file_name = os.path.splitext(os.path.basename(test_file))[0]
        output_file = os.path.join(test_files_folder, f"{test_file_name}_mutpy_output.txt")

        CPUload = psutil.cpu_percent(interval=4)
        RAMload = psutil.virtual_memory().percent

        # Use .format() to inline a string
        print("CPU Load: {}%".format(CPUload))
        print("RAM Load: {}%".format(RAMload))

        try:
            # Execute the command with a timeout of 10 seconds
            result = subprocess.run(
                command,
                capture_output=True,
                text=True,
                timeout=120,
                close_fds=True, 
                # shell=True
            )
            stdout = result.stdout
            stderr = result.stderr
            run_result = result.returncode  # Exit status code
        except subprocess.TimeoutExpired as e:
            # Handle timeout
            stdout = e.stdout or ""
            stderr = e.stderr or ""
            run_result = "TIMEOUT"
            print(f"Timeout occurred for {test_file}. Saving partial output.")

        # Write the output to the file
        with open(output_file, "w") as f:
            f.write(f"Command: {' '.join(command)}\n\n")
            f.write(f"Output:\n{stdout}\n")
            if stderr:
                f.write(f"\nErrors:\n{stderr}\n")

        print(f"Output saved to {output_file}. Program exited with code {run_result}")

        # Append the result to the CSV file
        append_to_csv(test_name=test_file_name, target_name=os.path.basename(implementation_file), run_result=run_result)


def process_all_tasks(base_folder, additional_args, timeout_per_file):
    """
    Processes all task directories in the base folder.

    :param base_folder: Path to the base folder containing task directories.
    :param mutpy_path: Path to the mut.py script.
    :param additional_args: List of additional arguments to pass to mut.py (optional).
    """
    for task_dir in tqdm(os.listdir(base_folder), desc="Running mut.py on test files", unit="file"):
        print(flush=True)
        task_path = os.path.join(base_folder, task_dir)
        if os.path.isdir(task_path):
            print(f"Processing task: {task_dir}")

            # Find the implementation file (assumes only one .py file that is not a test file)
            implementation_file = None
            for file in os.listdir(task_path):
                if file.endswith(".py") and not file.startswith("test_"):
                    implementation_file = os.path.join(task_path, file)
                    break

            if implementation_file:
                print(f"Found implementation file: {implementation_file}")
                run_mutpy_on_tests(implementation_file, task_path, additional_args, timeout_per_file)
            else:
                print(f"No implementation file found in {task_path}")
        flush_results_to_csv()

        # python script takes too much resources
        gc.collect()


# Example usage
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run mutation testing on all tasks in a base folder.")
    parser.add_argument(
        "--base_folder",
        type=str,
        default="reference_data/docs_python",
        help="Path to the base folder containing task directories (default: reference_data/docs_python)"
    )
    parser.add_argument(
        "--theta",
        type=str,
        default="0.95",
        help="Theta parameter for mut.py (default: 0.95)"
    )
    parser.add_argument(
        "--timeout",
        type=int,
        default=120,
        help="Timeout (in seconds) for each mut.py subprocess (default: 120)"
    )
    args = parser.parse_args()

    base_folder = args.base_folder
    theta = args.theta
    timeout = args.timeout

    additional_args = [
        "-m",
        "--hom-strategy",
        "BETWEEN_OPERATORS",
        "--order",
        "2",
        "--eda-folder",
        "./eda",
        "-f",
        "10",
        "--theta",
        theta,
    ]

    process_all_tasks(base_folder, additional_args, timeout_per_file=timeout)