Source code for geochemistrypi.cli

# -*- coding: utf-8 -*-
import os
import platform
import subprocess
import threading
from typing import Optional

import typer

from ._version import __version__
from .data_mining.cli_pipeline import cli_pipeline
from .data_mining.constants import WORKING_PATH

app = typer.Typer()

CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
FRONTEND_PATH = os.path.join(CURRENT_PATH, "frontend")
BACKEND_PATH = os.path.join(CURRENT_PATH, "start_dash_pipeline.py")
PIPELINE_PATH = os.path.join(CURRENT_PATH, "start_cli_pipeline.py")
MLFLOW_STORE_PATH = os.path.join(f"file:{WORKING_PATH}", "geopi_tracking")


def _version_callback(value: bool) -> None:
    """Show Geochemistry Pi version."""
    if value:
        typer.echo(f"Geochemistry π {__version__}")
        raise typer.Exit()


[docs] @app.callback() def main(version: Optional[bool] = typer.Option(None, "--version", "-v", help="Show version.", callback=_version_callback, is_eager=True)) -> None: """ Geochemistry π is an open-sourced highly automated machine learning Python framework for data-driven geochemistry discovery. It has the cores components of continous training, machine learning lifecycle management and model serving. """ return
[docs] @app.command() def data_mining( data: str = typer.Option("", help="The path of the training data without model inference."), training: str = typer.Option("", help="The path of the training data."), inference: str = typer.Option("", help="The path of the inference data."), mlflow: bool = typer.Option(False, help="Start the mlflow server."), web: bool = False, ) -> None: """Implement the customized automated machine learning pipeline for geochemistry data mining.""" def start_backend(): """Start the backend server.""" start_backend_command = f"python {BACKEND_PATH}" subprocess.run(start_backend_command, shell=True) def start_frontend(): """Start the frontend server.""" start_frontend_command = f"cd {FRONTEND_PATH} && yarn start" subprocess.run(start_frontend_command, shell=True) def start_mlflow(): """Start the mlflow server.""" start_mlflow_command = f"mlflow ui --backend-store-uri {MLFLOW_STORE_PATH} " subprocess.run(start_mlflow_command, shell=True) if web: # Start the backend and frontend in parallel backend_thread = threading.Thread(target=start_backend) backend_thread.start() frontend_thread = threading.Thread(target=start_frontend) frontend_thread.start() # Wait for the threads to finish backend_thread.join() frontend_thread.join() else: # If mlflow is enabled, start the mlflow server, otherwise start the CLI pipeline if mlflow: # Start mlflow server to track the experiment mlflow_thread = threading.Thread(target=start_mlflow) mlflow_thread.start() else: # If the data is provided, start the CLI pipeline with continuous training if data: cli_pipeline(data) # If the training data and inference data are provided, start the CLI pipeline with continuous training and inference elif training and inference: cli_pipeline(training, inference) # If no data is provided, use built-in data to start the CLI pipeline with continuous training and inference else: cli_pipeline(training, inference)
[docs] @app.command() def web_setup() -> None: """Set up the dependency of the web application.""" my_os = platform.system() if my_os == "Windows": # Define the command to download and install Yarn on Windows using Chocolatey package manager download_yarn = "choco install yarn" subprocess.run(download_yarn, shell=True) # Define the command to download and install Node.js on Windows using Chocolatey package manager download_node = "choco install nodejs" subprocess.run(download_node, shell=True) elif my_os == "Linux": # Define the command to download and install Yarn on Linux using npm download_yarn = "apt-get install -y yarn" subprocess.run(download_yarn, shell=True) # Define the command to download and install Node.js on Linux using npm download_node = "apt-get install -y nodejs" subprocess.run(download_node, shell=True) elif my_os == "Darwin": try: check_node = "node --version" subprocess.run(check_node, shell=True) print("Node.js is already installed.") except subprocess.CalledProcessError: # Define the command to download and install Node.js on macOS using Homebrew download_node = "brew install node" subprocess.run(download_node, shell=True) try: # Define the command to check if Yarn is installed check_yarn = "yarn --version" subprocess.run(check_yarn, shell=True) print("Yarn is already installed.") except subprocess.CalledProcessError: # Define the command to download and install Yarn on macOS using Homebrew download_yarn = "brew install yarn" subprocess.run(download_yarn, shell=True) # Define the command to install the frontend dependencies install_frontend_dependency_cmd = f"cd {FRONTEND_PATH} && yarn install" subprocess.run(install_frontend_dependency_cmd, shell=True)