Source code for modina.pipeline

from modina.context_net_inference import compute_context_scores
from modina.diff_net_construction import compute_diff_network
from modina.edge_filtering import filter
from modina.ranking import compute_ranking

import json
import logging
import os
from typing import Tuple, Optional
import pandas as pd


# Wrapper function to perform the whole moDiNA pipeline
[docs] def diffnet_analysis(context1: pd.DataFrame, context2: pd.DataFrame, meta_file: pd.DataFrame, edge_metric: Optional[str] = None, node_metric: Optional[str] = None, ranking_alg: str = 'PageRank+', filter_method: Optional[str] = None, filter_param: float = 0.0, filter_metric: Optional[str] = None, filter_rule: Optional[str]=None, max_path_length: int=2, test_type: str = 'nonparametric', nan_value: Optional[int] = None, correction: str = 'bh', num_workers: int=1, project_path: Optional[str] = None, name1: str = 'context1', name2: str = 'context2') -> Tuple[list, dict, Optional[pd.DataFrame], Optional[pd.DataFrame], dict]: """ Wrapper function to perform an end-to-end differential network analysis following the moDiNA pipeline. :param context1: Observed data of Context 1 (rows: samples, columns: variables). :param context2: Observed data of Context 2 (rows: samples, columns: variables). :param meta_file: Metadata file containing a 'label' and 'type' column to specify the data type of each variable. :param test_type: Type of statistical tests to use for association score calculation. Defaults to 'nonparametric'. :param nan_value: Numerical value used for NaN values in the context data. If None, an error will be raised if such values are present. Defaults to None. :param correction: Correction method for multiple testing. Defaults to 'bh'. :param num_workers: Number of workers for parallel processing. Defaults to 1. :param filter_method: Method used for filtering. Defaults to None. :param filter_param: Parameter for the specified filtering method. Defaults to 0.0. :param filter_metric: Edge metric used for filtering. Defaults to None. :param filter_rule: Rule to integrate the networks during filtering. Defaults to None. :param edge_metric: Edge metric used to construct the differential network. :param node_metric: Node metric used to construct the differential network. :param max_path_length: Maximum length of paths to consider in the computation of integrated interaction scores. Defaults to 2. :param ranking_alg: Ranking algorithm to compute. Options are 'PageRank+', 'PageRank', 'absDimontRank', 'DimontRank', 'direct_node' and 'direct_edge'. Defaults to 'PageRank+'. :param name1: Name of Context 1. Used for saving files. Defaults to 'context1'. :param name2: Name of Context 2. Used for saving files. Defaults to 'context2'. :param project_path: Optional path to save results. Defaults to None. :return: A tuple (ranking, edges_diff, nodes_diff, config) containing the computed ranking, differential edges, differential nodes, and configuration parameters. """ if project_path is not None: os.makedirs(project_path, exist_ok=True) config_path = os.path.join(project_path, 'config.json') scores1_path = os.path.join(project_path, f'{name1}_scores.csv') scores2_path = os.path.join(project_path, f'{name2}_scores.csv') ranking_path = os.path.join(project_path, f'{ranking_alg}.csv') else: scores1_path = None config_path = None scores2_path = None ranking_path = None # Score calculation logging.info('Computing association scores...') scores1 = compute_context_scores(context_data=context1, meta_file=meta_file, test_type=test_type, correction=correction, num_workers=num_workers, path=scores1_path, nan_value=nan_value) scores2 = compute_context_scores(context_data=context2, meta_file=meta_file, test_type=test_type, correction=correction, num_workers=num_workers, path=scores2_path, nan_value=nan_value) logging.info('Done.') # Filtering if any([filter_method, filter_metric, filter_rule, filter_param]): logging.info('Edge filtering...') scores1_filtered, scores2_filtered, context1_filtered, context2_filtered = filter(context1=context1, context2=context2, scores1=scores1, scores2=scores2, filter_method=filter_method, filter_param=filter_param, filter_metric=filter_metric, filter_rule=filter_rule) logging.info('Done.') else: logging.warning('The differential network will be computed based on unfiltered data. No filter parameters were specified.') scores1_filtered = scores1.copy() scores2_filtered = scores2.copy() context1_filtered = context1.copy() context2_filtered = context2.copy() # Differential network computation logging.info('Computing differential network...') edges_diff, nodes_diff = compute_diff_network(scores1=scores1_filtered, scores2=scores2_filtered, context1=context1_filtered, context2=context2_filtered, edge_metric=edge_metric, node_metric=node_metric, max_path_length=max_path_length, correction=correction, path=project_path, format='csv', meta_file=meta_file, test_type=test_type, nan_value=nan_value) logging.info('Done.') # Ranking logging.info('Computing ranking...') ranking, rankings_per_type = compute_ranking(edges_diff=edges_diff, nodes_diff=nodes_diff, ranking_alg=ranking_alg, path=ranking_path, meta_file=meta_file) logging.info('Done.') # Create config dict params = { 'name1': name1, 'name2': name2, 'test_type': test_type, 'correction': correction, 'filter_method': filter_method, 'filter_metric': filter_metric, 'filter_rule': filter_rule, 'filter_param': filter_param, 'edge_metric': edge_metric, 'node_metric': node_metric, 'ranking_alg': ranking_alg } if edge_metric == 'int-IS': params['max_path_length'] = max_path_length if config_path is not None: with open(config_path, 'w') as f: json.dump(params, f, indent=4) return ranking, rankings_per_type, edges_diff, nodes_diff, params