Source code for parepy_toolbox.pare

"""Probabilistic Approach to Reliability Engineering (PAREPY)"""
import time
import os
import itertools
from datetime import datetime
from multiprocessing import Pool
from typing import Callable, Optional, List

import numpy as np
import pandas as pd

import parepy_toolbox.common_library as parepyco
import parepy_toolbox.distributions as parepydi



[docs]
def deterministic_algorithm_structural_analysis(obj: Callable, tol: float, max_iter: int, random_var_settings: list, x0: list, verbose: bool = False, args: Optional[tuple] = None) -> tuple[pd.DataFrame, float, float]:
    """
    Computes the reliability index and probability of failure using FORM (First Order Reliability Method).

    :param obj: The objective function: obj(x, args) -> float or obj(x) -> float, where x is a list with shape n and args is a tuple fixed parameters needed to completely specify the function.
    :param tol: Tolerance for convergence.
    :param max_iter: Maximum number of iterations allowed.
    :param random_var_settings: Containing the distribution type and parameters. Example: {'type': 'normal', 'parameters': {'mean': 0, 'std': 1}}. Supported distributions: (a) 'uniform': keys 'min' and 'max', (b) 'normal': keys 'mean' and 'std', (c) 'lognormal': keys 'mean' and 'std', (d) 'gumbel max': keys 'mean' and 'std', (e) 'gumbel min': keys 'mean' and 'std', (f) 'triangular': keys 'min', 'mode' and 'max', or (g) 'gamma': keys 'mean' and 'std'.
    :param x0: Initial guess.
    :param verbose: If True, prints detailed information about the process. Default is `False`.
    :param args: Extra arguments to pass to the objective function (optional).

    :return: Results of reliability analysis. output[0] = Numerical data obtained for the MPP search, output [1] = Probability of failure values for each indicator function, output[2] = Reliability index values for each indicator function.
    
    Example
    ==============
    >>> # pip install -U parepy-toolbox
    >>> from parepy_toolbox import deterministic_algorithm_structural_analysis

    def obj(x):
        return 12.5 * x[0]**3 - x[1]

    d = {'type': 'normal', 'parameters': {'mean': 1., 'std': 0.1}}
    l = {'type': 'normal', 'parameters': {'mean': 10., 'std': 1.}}
    var = [d, l]
    x0 = [1.0, 10.0]
    max_iter = 100
    tol = 1E-8

    results, pf, beta = deterministic_algorithm_structural_analysis(obj, tol, max_iter, var, x0, verbose=True)
    print(f"Probability of failure: {pf}")
    print(f"Reliability index (beta): {beta}")
    results.head()    
    """

    results = []
    x_k = x0.copy()
    error = 1 / tol
    iteration = 0
    n = len(random_var_settings)
    start_time = time.perf_counter()

    # Iteration process
    while error > tol and iteration < max_iter:
        row = {}
        mu_eq = []
        sigma_eq = []

        # Conversion Non-normal to Normal
        for i, var in enumerate(random_var_settings):
            paras_scipy = parepydi.convert_params_to_scipy(var['type'], var['parameters'])
            m, s = parepydi.normal_tail_approximation(var['type'], paras_scipy, x_k[i])
            mu_eq.append(m)
            sigma_eq.append(s)

        # yk
        dneq, dneq1 = parepyco.std_matrix(sigma_eq)
        mu_vars = parepyco.mu_matrix(mu_eq)
        y_k = parepyco.x_to_y(np.array(x_k).reshape(-1, 1), dneq1, mu_vars)
        beta_k = np.linalg.norm(y_k)
        for i in range(n):
            row[f"x_{i},k"] = x_k[i]
            row[f"y_{i},k"] = y_k[i, 0]
        row["β_k"] = beta_k

        # Numerical differentiation g(x) and g(y)
        g_diff_x = parepyco.jacobian_matrix(obj, x_k, 'center', h=1E-8, args=args) if args is not None else parepyco.jacobian_matrix(obj, x_k, 'center', h=1E-8)
        g_diff_y = np.matrix_transpose(dneq) @ g_diff_x
        
        # alpha vector
        norm_gdiff = np.linalg.norm(g_diff_y)
        alpha = g_diff_y / norm_gdiff
        for i in range(n):
            row[f"α_{i},k"] = alpha[i, 0]

        # Beta update
        g_y = obj(x_k, args) if args is not None else obj(x_k)
        beta_k1 = beta_k + g_y / (np.matrix_transpose(g_diff_y) @ alpha)
        row["β_k+1"] = beta_k1[0, 0]

        # yk and xk update 
        y_k1 = - alpha @ beta_k1
        for i in range(n):
            row[f"y_{i},k+1"] = y_k1[i, 0]

        x_k1 = parepyco.y_to_x(y_k1, dneq, mu_vars).flatten().tolist()
        for i in range(n):
            row[f"x_{i},k+1"] = x_k1[i]

        # Storage and error
        x_k = x_k1.copy()
        y_k = y_k1.copy()
        iteration += 1
        if beta_k == 0.0:
            beta_k = tol * 1E1
        error = np.abs(beta_k1[0, 0] - beta_k) / beta_k
        row["error"] = error

        # Verbose 
        if verbose:
            elapsed_time = time.perf_counter() - start_time
            print(f"⏱️ Time: {elapsed_time:.4e}s, Iteration {iteration} (error = {error:.4e})")
        results.append(row)

    df = pd.DataFrame(results)

    # Ordering columns
    col_order = (
        [f"x_{i},k" for i in range(n)] +
        [f"y_{i},k" for i in range(n)] +
        ["β_k"] +
        [f"α_{i},k" for i in range(n)] +
        ["β_k+1"] +
        [f"y_{i},k+1" for i in range(n)] +
        [f"x_{i},k+1" for i in range(n)] +
        ["error"]
    )
    results = df[col_order]

    # Last row contain the final beta value and probability of failure
    if verbose:
        print("🧮 Computes β and pf")
    final_beta = df["β_k+1"].iloc[-1]
    final_pf = parepyco.pf_equation(final_beta)

    # hessian = parepyco.hessian_matrix(obj, x: list, method: str, h: float = 1E-5, args: Optional[tuple] = None)
    # if method.lower() == "sorm":
    #     beta_u = beta_k1[0, 0]
    #     mu_eq = []
    #     sigma_eq = []
    #     # Conversion Non-normal to Normal
    #     for i, var in enumerate(variables):
    #         paras_scipy = parepydi.convert_params_to_scipy(var['type'], var['parameters'])
    #         m, s = parepydi.normal_tail_approximation(var['type'], paras_scipy, x_k[i])
    #         mu_eq.append(m)
    #         sigma_eq.append(s)
    #     dneq, dneq1 = parepyco.std_matrix(sigma_eq)
    #     mu_vars = parepyco.mu_matrix(mu_eq)
    #     # Numerical differentiation g(y)
    #     g_diff_x = parepyco.jacobian_matrix(obj, x_k, 'center', h=1E-8, args=args) if args is not None else parepyco.jacobian_matrix(obj, x_k, 'center', h=1E-8)
    #     g_diff_y = np.matrix_transpose(dneq) @ np.array(g_diff_x).reshape(-1, 1)
    #     norm_gdiff = np.linalg.norm(g_diff_y)
    #     m = len(x_k)
    #     q = np.eye(m)
    #     q[:, 0] = y_k.flatten().tolist()
    #     q, _ = np.linalg.qr(q)
    #     q = np.fliplr(q)
    #     a = q.T @ hessian @ q
    #     j = np.eye(m - 1) + beta_u * a[:m-1, :m-1] / norm_gdiff
    #     det_j = np.linalg.det(j)
    #     correction = 1 / np.sqrt(det_j)
    #     pf_sorm = sc.stats.norm.cdf(-beta_u) * correction
    #     beta_sorm = -sc.stats.norm.ppf(pf_sorm)
    if verbose:
        print("✔️ Algorithm finished!")

    return results, final_pf, final_beta




[docs]
def sampling_algorithm_structural_analysis(obj: Callable, random_var_settings: list, method: str, n_samples: int, number_of_limit_functions: int, parallel: bool = True, verbose: bool = False, random_var_settings_importance_sampling: Optional[list] = None, args: Optional[tuple] = None) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """                                    
    Computes the reliability index and probability of failure using sampling methods.

    :param obj: The objective function: obj(x, args) -> float or obj(x) -> float, where x is a list with shape n and args is a tuple fixed parameters needed to completely specify the function.
    :param random_var_settings: Containing the distribution type and parameters. Example: {'type': 'normal', 'parameters': {'mean': 0, 'std': 1}}. Supported distributions: (a) 'uniform': keys 'min' and 'max', (b) 'normal': keys 'mean' and 'std', (c) 'lognormal': keys 'mean' and 'std', (d) 'gumbel max': keys 'mean' and 'std', (e) 'gumbel min': keys 'mean' and 'std', (f) 'triangular': keys 'min', 'mode' and 'max', or (g) 'gamma': keys 'mean' and 'std'.
    :param method: Sampling method. Supported values: 'lhs' (Latin Hypercube Sampling), 'mcs' (Crude Monte Carlo Sampling) or 'sobol' (Sobol Sampling).
    :param n_samples: Number of samples. For Sobol sequences, this variable represents the exponent "m" (n = 2^m).
    :param number_of_limit_functions: Number of limit state functions or constraints.
    :param parallel: Start parallel process.
    :param verbose: If True, prints detailed information about the process. Default is `False`.
    :param args: Extra arguments to pass to the objective function (optional).

    :return: Results of reliability analysis. output[0] = Numerical data obtained for the MPP search, output [1] = Probability of failure values for each indicator function, output[2] = Reliability index values for each indicator function.

    Example
    ==============
    >>> # pip install -U parepy-toolbox
    from parepy_toolbox import sampling_algorithm_structural_analysis

    def obj(x):
        return [12.5 * x[0]**3 -x[1]]

    d = {'type': 'normal', 'parameters': {'mean': 1., 'std': 0.1}}
    l = {'type': 'normal', 'parameters': {'mean': 10., 'std': 1.}}
    var = [d, l]

    df, pf, beta = sampling_algorithm_structural_analysis(obj, var, method='lhs', n_samples=10000, number_of_limit_functions=1, parallel=False, verbose=True)
    print(pf)
    print(beta)
    print(df.head())
    """

    block_size = 100
    ran_var_set = random_var_settings_importance_sampling if random_var_settings_importance_sampling is not None else random_var_settings
    if method != 'sobol':
        samples_per_block = n_samples // block_size
        samples_per_block_remainder = n_samples % block_size
        setups = [(obj, ran_var_set, method, samples_per_block, number_of_limit_functions, args) for _ in range(block_size)] if args is not None else [(obj, ran_var_set, method, samples_per_block, number_of_limit_functions) for _ in range(block_size)]
        if samples_per_block_remainder > 0:
            setups.append((obj, ran_var_set, method, samples_per_block_remainder, number_of_limit_functions, args) if args is not None else (obj, ran_var_set, method, samples_per_block_remainder, number_of_limit_functions))
    else:
        parallel = False
        setups = [(obj, ran_var_set, method, n_samples, number_of_limit_functions, args) if args is not None else (obj, ran_var_set, method, n_samples, number_of_limit_functions)]

    # Random sampling and computes G function
    start_time = time.perf_counter()
    if parallel:
        with Pool() as pool:
            results = pool.starmap(parepyco.sampling_kernel_without_time, setups)
    else:
        results = [parepyco.sampling_kernel_without_time(*args_aux) for args_aux in setups]
    end_time = time.perf_counter()
    final_df = pd.concat(results, ignore_index=True)
    if verbose:
        print(f"Sampling and computes the G functions {end_time - start_time:.2f} seconds.")

    # Computes pf and beta
    if random_var_settings_importance_sampling:
        final_df = calculate_weights(final_df, random_var_settings, random_var_settings_importance_sampling)
    else:
        pf_df, beta_df = parepyco.summarize_pf_beta(final_df)


    if verbose:
        filename = f"sampling_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
        final_df.to_csv(filename, sep="\t", index=False)
        print(f"file '{filename}' has been successfully saved.")
        print("✔️ Algorithm finished!")

    return final_df, pf_df, beta_df




[docs]
def reprocess_sampling_results(folder_path: str, verbose: bool = False) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Reprocesses sampling results from multiple .txt files in a specified folder.

    :param folder_path: Path to the folder containing sampling result files (.txt format).
    :param verbose: If True, prints detailed information about the process. Default is `False`.

    :return: Results of reprocessing: [0] = Combined dataframe with all sampling data, [1] = Failure probabilities for each limit state function, [2] = Reliability index (beta) for each limit state function.

    Example
    ==============
    >>> # pip install -U parepy-toolbox
    from parepy_toolbox import reprocess_sampling_results

    df, pf, beta = reprocess_sampling_results("path/to/your/folder", verbose=True)

    print("PF:", pf_df)
    print("Beta:", beta_df)
    df_all.head()
    """

    start_time = time.perf_counter()
    all_files = [f for f in os.listdir(folder_path) if f.endswith(".txt")]
    dataframes = []
    for file in all_files:
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path, sep="\t")
        dataframes.append(df)
    final_df = pd.concat(dataframes, ignore_index=True)
    if verbose:
        print(f"🧮 {len(dataframes)} files loaded. Total number of samples: {len(final_df)}.")
    col_I = [col for col in final_df.columns if col.startswith("I_")]

    f_df, beta_df = parepyco.summarize_failure_probabilities(final_df)

    end_time = time.perf_counter()

    if verbose:
        print("Reprocessing completed successfully.")
        print(f"🧮 Sampling and computes the G functions {end_time - start_time:.2f} seconds.")
        print("✔️ Algorithm finished!")

    return final_df, f_df, beta_df




[docs]
def sobol_algorithm(obj: Callable,  random_var_settings: list, n_sobol: int, number_of_limit_functions: int, parallel: bool = False, verbose: bool = False, args: Optional[tuple] = None) -> pd.DataFrame:
    """
    Calculates the Sobol sensitivity indices in structural reliability problems.

    :param obj: The objective function: ``obj(x, args) -> float`` or ``obj(x) -> float``, where ``x`` is a list with shape ``n`` and ``args`` is a tuple of fixed parameters needed to completely specify the function.
    :param random_var_settings: List of dictionaries containing the distribution type and parameters.  
        Example: ``{'type': 'normal', 'parameters': {'mean': 0, 'std': 1}}``.  
        Supported distributions:  
        - ``uniform``: keys ``min`` and ``max``  
        - ``normal``: keys ``mean`` and ``std``  
        - ``lognormal``: keys ``mean`` and ``std``  
        - ``gumbel_max``: keys ``mean`` and ``std``  
        - ``gumbel_min``: keys ``mean`` and ``std``  
        - ``triangular``: keys ``min``, ``mode`` and ``max``  
        - ``gamma``: keys ``mean`` and ``std``
    :param n_sobol: This variable represents the exponent "m" (``n = 2^m``) to generate Sobol sequence sampling. Must be a positive integer.
    :param number_of_limit_functions: Number of limit state functions or constraints.
    :param parallel: If True, runs the evaluation in parallel processes.
    :param verbose: If True, prints detailed information about the process.
    :param args: Extra arguments to pass to the objective function (optional).

    :return: A pandas DataFrame with the first-order and total-order Sobol sensitivity indices for each input variable.

    Example
    =======
    >>> # pip install -U parepy-toolbox
    >>> from parepy_toolbox import sobol_algorithm
    >>> import time
    >>> def obj(x, *args):
    ...     g_0 = 12.5 * x[0] ** 3 - x[1]
    ...     time.sleep(1e-6)
    ...     return [g_0]

    >>> d = {'type': 'normal', 'parameters': {'mean': 1.0, 'std': 0.1}}
    >>> l = {'type': 'normal', 'parameters': {'mean': 10.0, 'std': 1.0}}
    >>> var = [d, l]
    >>> amostras = 12

    >>> data_sobol = sobol_algorithm(obj=obj, random_var_settings=var, n_sobol=amostras, number_of_limit_functions=1, verbose=True)
    >>> data_sobol.head()
    """

    if verbose:
        print("🧮 Starting Sobol analysis...")

    # Sampling for distributions A and B
    start_time = time.perf_counter()
    dist_a, _, _ = sampling_algorithm_structural_analysis(obj, random_var_settings, 'sobol', n_sobol, number_of_limit_functions, parallel=parallel, verbose=verbose, args=args) if args is not None else sampling_algorithm_structural_analysis(obj, random_var_settings, 'sobol', n_sobol, number_of_limit_functions, parallel=parallel, verbose=verbose)
    dist_b, _, _ = sampling_algorithm_structural_analysis(obj, random_var_settings, 'sobol', n_sobol, number_of_limit_functions, parallel=parallel, verbose=verbose, args=args) if args is not None else sampling_algorithm_structural_analysis(obj, random_var_settings, 'sobol', n_sobol, number_of_limit_functions, parallel=parallel, verbose=verbose)

    # DataFrame
    n_samples = 2 ** n_sobol
    y_a = dist_a['G_0'].to_list()
    y_b = dist_b['G_0'].to_list()
    f_0_2 = (sum(y_a) / n_samples) ** 2

    A = dist_a.drop(['G_0', 'I_0'], axis=1).to_numpy()
    B = dist_b.drop(['G_0', 'I_0'], axis=1).to_numpy()
    K = A.shape[1]

    s_i = []
    s_t = []
    for i in range(K):
        C = np.copy(B)
        C[:, i] = A[:, i]
        y_c_i = []
        for j in range(n_samples):
            g = obj(list(C[j, :]), args)
            y_c_i.append(g[0])

        y_a_dot_y_c_i = [y_a[m] * y_c_i[m] for m in range(n_samples)]
        y_b_dot_y_c_i = [y_b[m] * y_c_i[m] for m in range(n_samples)]
        y_a_dot_y_a = [y_a[m] * y_a[m] for m in range(n_samples)]

        var_y = (1 / n_samples) * sum(y_a_dot_y_a) - f_0_2

        s_i.append(((1 / n_samples) * sum(y_a_dot_y_c_i) - f_0_2) / var_y)
        s_t.append(1 - ((1 / n_samples) * sum(y_b_dot_y_c_i) - f_0_2) / var_y)

    end_time = time.perf_counter()
    if verbose:
        print(f"🧮 Sobol analysis completed in {end_time - start_time:.2f} seconds.")
        print("✔️ Algorithm finished!")

    dict_sobol = pd.DataFrame({'s_i': s_i, 's_t': s_t})

    return dict_sobol




[docs]
def generate_factorial_design(variable_names: List[str], levels_per_variable: List[List[float]], verbose: bool = False) -> pd.DataFrame:
    """
    Generates a full factorial design based on variable names and levels.

    :param variable_names: Variable names.
    :param levels_per_variable: List of lists, where each sublist contains the levels for the corresponding variable.
    :param verbose: If True, prints the number of combinations and preview of the DataFrame.
    :return: All possible combinations of the levels provided.

    **Example**::

        >>> # pip install -U parepy-toolbox
        >>> from parepy_toolbox import generate_factorial_design

        >>> variable_names = ['i (mm)', 'j (mm)', 'k (mm)', 'l (mm)']
        >>> levels_per_variable = [
        ...     np.linspace(0, 10, 3),
        ...     np.linspace(0, 15, 4),
        ...     [5, 15],
        ...     np.linspace(0, 20, 5)
        ... ]

        >>> df = parepy.generate_factorial_design(variable_names, levels_per_variable, verbose=True)
        >>> print(df.head())
    """

    if verbose:
        print("🧮 Generating factorial design...")
        for name, levels in zip(variable_names, levels_per_variable):
            print(f" - {name}: {levels}")

    combinations = list(itertools.product(*levels_per_variable))
    df = pd.DataFrame(combinations, columns=variable_names)

    if verbose:
        print(f"🧮 Generated {len(df)} combinations.")
        print("🧮 Sample of factorial design:")
        print("✔️ Algorithm finished!")

    return df



# def sampling_algorithm_structural_analysis_kernel(objective_function: callable, number_of_samples: int, numerical_model: dict, variables_settings: list, number_of_limit_functions: int, none_variable = None) -> pd.DataFrame:
#     """
#     Creates samples and evaluates the limit state functions in structural reliability problems.

#     :param objective_function: User-defined Python function to evaluate the limit state(s).
#     :param number_of_samples: Number of samples to generate.
#     :param numerical_model: Dictionary with model configuration (e.g., sampling type).
#     :param variables_settings: List of variable definitions with distribution parameters.
#     :param number_of_limit_functions: Number of limit state functions or constraints.
#     :param none_variable: Optional auxiliary input to be passed to the objective function.

#     :return: DataFrame with reliability analysis results.
#     """

#     # Ensure all variables have seeds
#     for var in variables_settings:
#         if 'seed' not in var:
#             var['seed'] = None

#     n_dimensions = len(variables_settings)
#     algorithm = numerical_model['model sampling']
#     is_time_analysis = algorithm.upper() in ['MCS-TIME', 'MCS_TIME', 'MCS TIME', 'LHS-TIME', 'LHS_TIME', 'LHS TIME']
#     time_analysis = numerical_model['time steps'] if is_time_analysis else None

#     # Generate samples
#     dataset_x = parepyco.sampling(
#         n_samples=number_of_samples,
#         model=numerical_model,
#         variables_setup=variables_settings
#     )

#     # Initialize output arrays
#     capacity = np.zeros((len(dataset_x), number_of_limit_functions))
#     demand = np.zeros((len(dataset_x), number_of_limit_functions))
#     state_limit = np.zeros((len(dataset_x), number_of_limit_functions))
#     indicator_function = np.zeros((len(dataset_x), number_of_limit_functions))

#     # Evaluate objective function
#     for idx, sample in enumerate(dataset_x):
#         c_i, d_i, g_i = objective_function(list(sample), none_variable)
#         capacity[idx, :] = c_i
#         demand[idx, :] = d_i
#         state_limit[idx, :] = g_i
#         indicator_function[idx, :] = [1 if val <= 0 else 0 for val in g_i]

#     # Stack all results
#     results = np.hstack((dataset_x, capacity, demand, state_limit, indicator_function))

#     # Format results into DataFrame
#     if is_time_analysis:
#         block_size = int(len(results) / number_of_samples)
#         all_rows = []
#         for i in range(number_of_samples):
#             block = results[i * block_size:(i + 1) * block_size, :].T.flatten().tolist()
#             all_rows.append(block)
#         results_about_data = pd.DataFrame(all_rows)
#     else:
#         results_about_data = pd.DataFrame(results)

#     # Create column names
#     column_names = []

#     for i in range(n_dimensions):
#         if is_time_analysis:
#             for t in range(time_analysis):
#                 column_names.append(f'X_{i}_t={t}')
#         else:
#             column_names.append(f'X_{i}')

#     if is_time_analysis:
#         for t in range(time_analysis):
#             column_names.append(f'STEP_t_{t}')

#     for i in range(number_of_limit_functions):
#         if is_time_analysis:
#             for t in range(time_analysis):
#                 column_names.append(f'R_{i}_t={t}')
#         else:
#             column_names.append(f'R_{i}')

#     for i in range(number_of_limit_functions):
#         if is_time_analysis:
#             for t in range(time_analysis):
#                 column_names.append(f'S_{i}_t={t}')
#         else:
#             column_names.append(f'S_{i}')

#     for i in range(number_of_limit_functions):
#         if is_time_analysis:
#             for t in range(time_analysis):
#                 column_names.append(f'G_{i}_t={t}')
#         else:
#             column_names.append(f'G_{i}')

#     for i in range(number_of_limit_functions):
#         if is_time_analysis:
#             for t in range(time_analysis):
#                 column_names.append(f'I_{i}_t={t}')
#         else:
#             column_names.append(f'I_{i}')

#     results_about_data.columns = column_names

#     # First Barrier Failure (FBF) adjustment if time-dependent
#     if is_time_analysis:
#         results_about_data, _ = parepyco.fbf(
#             algorithm,
#             number_of_limit_functions,
#             time_analysis,
#             results_about_data
#         )

#     return results_about_data