Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| # # IV. MARKERS TRESHOLDS NOTEBOOK | |
| # ## IV.1. PACKAGES IMPORT | |
| import os | |
| import random | |
| import re | |
| import pandas as pd | |
| import numpy as np | |
| import seaborn as sb | |
| import matplotlib.pyplot as plt | |
| import matplotlib.colors as mplc | |
| import subprocess | |
| import warnings | |
| import panel as pn | |
| import json | |
| from scipy import signal | |
| from scipy.stats import pearsonr | |
| import plotly.figure_factory as ff | |
| import plotly | |
| import plotly.graph_objs as go | |
| from plotly.subplots import make_subplots | |
| from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot | |
| import plotly.express as px | |
| import sys | |
| sys.setrecursionlimit(5000) | |
| from my_modules import * | |
| #Silence FutureWarnings & UserWarnings | |
| warnings.filterwarnings('ignore', category= FutureWarning) | |
| warnings.filterwarnings('ignore', category= UserWarning) | |
| # ## IV.2. *DIRECTORIES | |
| # Set base directory | |
| #input_path = '/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431' | |
| #set_path = 'test' | |
| present_dir = os.path.dirname(os.path.realpath(__file__)) | |
| stored_variables_path = os.path.join(present_dir,'stored_variables.json') | |
| with open(stored_variables_path, 'r') as file: | |
| stored_vars = json.load(file) | |
| directory = stored_vars['base_dir'] | |
| input_path = os.path.join(present_dir,directory) | |
| set_path = stored_vars['set_path'] | |
| selected_metadata_files = stored_vars['selected_metadata_files'] | |
| ls_samples = stored_vars['ls_samples'] | |
| base_dir = input_path | |
| set_name = set_path | |
| project_name = set_name # Project name | |
| step_suffix = 'mt' # Curent part (here part IV) | |
| previous_step_suffix_long = "_zscore" # Previous part (here ZSCORE NOTEBOOK) | |
| # Initial input data directory | |
| input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long) | |
| # ZSCORE/LOG2 output directories | |
| output_data_dir = os.path.join(base_dir, project_name + "_" + step_suffix) | |
| # ZSCORE/LOG2 images subdirectory | |
| output_images_dir = os.path.join(output_data_dir,"images") | |
| # Data and Metadata directories | |
| # Metadata directories | |
| metadata_dir = os.path.join(base_dir, project_name + "_metadata") | |
| # images subdirectory | |
| metadata_images_dir = os.path.join(metadata_dir,"images") | |
| # Create directories if they don't already exist | |
| #for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]: | |
| # if not os.path.exists(d): | |
| #print("Creation of the" , d, "directory...") | |
| # os.makedirs(d) | |
| #else : | |
| # print("The", d, "directory already exists !") | |
| #os.chdir(input_data_dir) | |
| # Verify paths | |
| #print('base_dir :', base_dir) | |
| #print('input_data_dir :', input_data_dir) | |
| #print('output_data_dir :', output_data_dir) | |
| #print('output_images_dir :', output_images_dir) | |
| #print('metadata_dir :', metadata_dir) | |
| #print('metadata_images_dir :', metadata_images_dir) | |
| # ## IV.3. FILES | |
| # ### IV.3.1. METADATA | |
| filename = "marker_intensity_metadata.csv" | |
| filename = os.path.join(metadata_dir, filename) | |
| # Check file exists | |
| #if not os.path.exists(filename): | |
| # print("WARNING: Could not find desired file: "+filename) | |
| #else : | |
| # print("The",filename,"file was imported for further analysis!") | |
| # Open, read in information | |
| metadata = pd.read_csv(filename) | |
| # Verify size with verify_line_no() function in my_modules.py | |
| #verify_line_no(filename, metadata.shape[0] + 1) | |
| # Verify headers | |
| exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation'] | |
| compare_headers(exp_cols, metadata.columns.values, "Marker metadata file") | |
| metadata = metadata.dropna() | |
| metadata.head() | |
| # ### IV.3.2. NOT_INTENSITIES | |
| filename = "not_intensities.csv" | |
| filename = os.path.join(metadata_dir, filename) | |
| # Check file exists | |
| #if not os.path.exists(filename): | |
| # print("WARNING: Could not find desired file: "+filename) | |
| #else : | |
| # print("The",filename,"file was imported for further analysis!") | |
| not_intensities = [] | |
| with open(filename, 'r') as fh: | |
| not_intensities = fh.read().strip().split("\n") | |
| # take str, strip whitespace, split on new line character | |
| # Verify size | |
| #print("\nVerifying data read from file is the correct length...\n") | |
| #verify_line_no(filename, len(not_intensities)) | |
| # Print to console | |
| #print("not_intensities =\n", not_intensities) | |
| # ### IV.3.3. FULL_TO_SHORT_COLUMN_NAMES | |
| filename = "full_to_short_column_names.csv" | |
| filename = os.path.join(metadata_dir, filename) | |
| # Check file exists | |
| #if not os.path.exists(filename): | |
| # print("WARNING: Could not find desired file: " + filename) | |
| #else : | |
| # print("The",filename,"file was imported for further analysis!") | |
| # Open, read in information | |
| df = pd.read_csv(filename, header = 0) | |
| # Verify size | |
| print("Verifying data read from file is the correct length...\n") | |
| #verify_line_no(filename, df.shape[0] + 1) | |
| # Turn into dictionary | |
| full_to_short_names = df.set_index('full_name').T.to_dict('records')[0] | |
| #print('full_to_short_names =\n',full_to_short_names) | |
| # ### IV.3.4. SHORT_TO_FULL_COLUMN_NAMES | |
| filename = "short_to_full_column_names.csv" | |
| filename = os.path.join(metadata_dir, filename) | |
| # Check file exists | |
| #if not os.path.exists(filename): | |
| # print("WARNING: Could not find desired file: " + filename) | |
| #else : | |
| # print("The",filename,"file was imported for further analysis!") | |
| # Open, read in information | |
| df = pd.read_csv(filename, header = 0) | |
| # Verify size | |
| #print("Verifying data read from file is the correct length...\n") | |
| #verify_line_no(filename, df.shape[0] + 1) | |
| # Turn into dictionary | |
| short_to_full_names = df.set_index('short_name').T.to_dict('records')[0] | |
| # Print information | |
| #print('short_to_full_names =\n',short_to_full_names) | |
| # ### IV.3.10. DATA | |
| # List files in the directory | |
| # Check if the directory exists | |
| if os.path.exists(input_data_dir): | |
| # List files in the directory | |
| ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith("_zscore.csv")] | |
| # print("The following CSV files were detected:") | |
| # print([sample for sample in ls_samples]) | |
| #else: | |
| # print(f"The directory {input_data_dir} does not exist.") | |
| # Import all the others files | |
| dfs = {} | |
| # Set variable to hold default header values | |
| # First gather information on expected headers using first file in ls_samples | |
| # Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples | |
| df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1) | |
| expected_headers = df.columns.values | |
| #print('Header order should be :\n', expected_headers, '\n') | |
| ############################### | |
| # !! This may take a while !! # | |
| ############################### | |
| for sample in ls_samples: | |
| file_path = os.path.join(input_data_dir,sample) | |
| try: | |
| # Read the CSV file | |
| df = pd.read_csv(file_path, index_col=0) | |
| # Check if the DataFrame is empty, if so, don't continue trying to process df and remove it | |
| if not df.empty: | |
| # Reorder the columns to match the expected headers list | |
| df = df.reindex(columns=expected_headers) | |
| # print(sample, "file is processed !\n") | |
| #print(df) | |
| except pd.errors.EmptyDataError: | |
| # print(f'\nEmpty data error in {sample} file. Removing from analysis...') | |
| ls_samples.remove(sample) | |
| # Add df to dfs | |
| dfs[sample] = df | |
| #print(dfs) | |
| # Merge dfs into one df | |
| df = pd.concat(dfs.values(), ignore_index=False , sort = False) | |
| del dfs | |
| print(df.head()) | |
| intial_df = pn.pane.DataFrame(df.head(40), width = 2500) | |
| # ### Marker Classification | |
| # ## IV.5. *DOTPLOTS | |
| df | |
| # Load existing data from stored_variables.json with error handling | |
| try: | |
| with open(stored_variables_path, 'r') as file: | |
| data = json.load(file) | |
| except json.JSONDecodeError as e: | |
| # print(f"Error reading JSON file: {e}") | |
| data = {} | |
| # Debug: Print loaded data to verify keys | |
| #print(data) | |
| df | |
| df.head() | |
| # ### IV.7.2. DOTPLOTS-DETERMINED TRESHOLD | |
| #Empty dict in stored_variables to store the cell type classification for each marker | |
| #stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json' | |
| try: | |
| with open(stored_variables_path, 'r') as f: | |
| stored_variables = json.load(f) | |
| except FileNotFoundError: | |
| stored_variables = {} | |
| # Check if 'thresholds' field is present, if not, add it | |
| if 'cell_type_classification' not in stored_variables: | |
| cell_type_classification = {} | |
| stored_variables['cell_type_classification'] = cell_type_classification | |
| with open(stored_variables_path, 'w') as f: | |
| json.dump(stored_variables, f, indent=4) | |
| #Empty dict in stored_variables to store the cell subtype classification for each marker | |
| #stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json' | |
| try: | |
| with open(stored_variables_path, 'r') as f: | |
| stored_variables = json.load(f) | |
| except FileNotFoundError: | |
| stored_variables = {} | |
| # Check if 'thresholds' field is present, if not, add it | |
| if 'cell_subtype_classification' not in stored_variables: | |
| cell_type_classification = {} | |
| stored_variables['cell_subtype_classification'] = cell_type_classification | |
| with open(stored_variables_path, 'w') as f: | |
| json.dump(stored_variables, f, indent=4) | |
| df | |
| data = df | |
| import json | |
| import panel as pn | |
| # Load existing stored variables | |
| with open(stored_variables_path, 'r') as f: | |
| stored_variables = json.load(f) | |
| # Initialize a dictionary to hold threshold inputs | |
| threshold_inputs = {} | |
| # Create widgets for each marker to get threshold inputs from the user | |
| for marker in stored_variables['markers']: | |
| threshold_inputs[marker] = pn.widgets.FloatInput(name=f'{marker} Threshold', value=0.0, step=0.1) | |
| # Load stored_variables.json | |
| #stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json' | |
| try: | |
| with open(stored_variables_path, 'r') as f: | |
| stored_variables = json.load(f) | |
| except FileNotFoundError: | |
| stored_variables = {} | |
| # Check if 'thresholds' field is present, if not, add it | |
| if 'thresholds' not in stored_variables: | |
| thresholds = {marker: input_widget.value for marker, input_widget in threshold_inputs.items()} | |
| stored_variables['thresholds'] = thresholds | |
| with open(stored_variables_path, 'w') as f: | |
| json.dump(stored_variables, f, indent=4) | |
| # Save button to save thresholds to stored_variables.json | |
| def save_thresholds(event): | |
| thresholds = {marker: input_widget.value for marker, input_widget in threshold_inputs.items()} | |
| stored_variables['thresholds'] = thresholds | |
| with open(stored_variables_path, 'w') as f: | |
| json.dump(stored_variables, f, indent=4) | |
| pn.state.notifications.success('Thresholds saved successfully!') | |
| save_button2 = pn.widgets.Button(name='Save Thresholds', button_type='primary') | |
| save_button2.on_click(save_thresholds) | |
| # Create a GridSpec layout | |
| grid = pn.GridSpec() | |
| # Add the widgets to the grid with three per row | |
| row = 0 | |
| col = 0 | |
| for marker in stored_variables['markers']: | |
| grid[row, col] = threshold_inputs[marker] | |
| col += 1 | |
| if col == 5: | |
| col = 0 | |
| row += 1 | |
| # Add the save button at the end | |
| grid[row + 1, :5] = save_button2 | |
| # Panel layout | |
| threshold_panel = pn.Column( | |
| pn.pane.Markdown("## Define Thresholds for Markers"), | |
| grid) | |
| import pandas as pd | |
| import json | |
| # Load stored variables from the JSON file | |
| with open(stored_variables_path, 'r') as file: | |
| stored_variables = json.load(file) | |
| # Step 1: Identify intensities | |
| intensities = list(df.columns) | |
| def assign_cell_type(row): | |
| for intensity in intensities: | |
| marker = intensity.split('_')[0] # Extract marker from intensity name | |
| if marker in stored_variables['thresholds']: | |
| threshold = stored_variables['thresholds'][marker] | |
| if row[intensity] > threshold: | |
| for cell_type, markers in stored_variables['cell_type_classification'].items(): | |
| if marker in markers: | |
| return cell_type | |
| return 'STROMA' # Default if no condition matches | |
| # Step 5: Apply the classification function to the DataFrame | |
| df['cell_type'] = df.apply(lambda row: assign_cell_type(row), axis=1) | |
| df.head() | |
| # Check if 'IMMUNE' is present in any row of the cell_type column | |
| present_stroma = df['cell_type'].str.contains('STROMA').sum() | |
| present_cancer = df['cell_type'].str.contains('CANCER').sum() | |
| present_immune = df['cell_type'].str.contains('IMMUNE').sum() | |
| present_endothelial = df['cell_type'].str.contains('ENDOTHELIAL').sum() | |
| # Print the result | |
| #print(present_stroma) | |
| #print(present_cancer) | |
| #print(present_immune) | |
| #print(present_endothelial) | |
| #print(len(df)) | |
| df.head(30) | |
| df | |
| # ## IV.8. *HEATMAPS | |
| #print(df.columns) | |
| # Assuming df_merged is your DataFrame | |
| if 'Sample_ID.1' in df.columns: | |
| df = df.rename(columns={'Sample_ID.1': 'Sample_ID'}) | |
| # print("After renaming Sample_ID", df.columns) | |
| # Selecting a subset of rows from the DataFrame df based on the 'Sample_ID' column | |
| # and then randomly choosing 20,000 rows from that subset to create the DataFrame test_dfkeep = ['TMA.csv'] | |
| with open(stored_variables_path, 'r') as file: | |
| ls_samples = stored_vars['ls_samples'] | |
| keep = ls_samples | |
| keep_cell_type = ['ENDOTHELIAL','CANCER', 'STROMA', 'IMMUNE'] | |
| #if 'Sample_ID' in df.columns: | |
| # print("The",df.loc[df['cell_type'].isin(keep_cell_type)]) | |
| test2_df = df.loc[(df['cell_type'].isin(keep_cell_type)) | |
| & (df['Sample_ID'].isin(keep)), :].copy() | |
| #print(test2_df.head()) | |
| random_rows = np.random.choice(len(test2_df),20000) | |
| df2 = test2_df.iloc[random_rows,:].copy() | |
| df2 | |
| #print(df2) | |
| # ### COLORS | |
| # #### SAMPLES COLORS | |
| color_values = sb.color_palette("husl",n_colors = len(ls_samples)) | |
| sb.palplot(sb.color_palette(color_values)) | |
| TMA_samples = [s for s in df.Sample_ID.unique() if 'TMA' in s] | |
| TMA_color_values = sb.color_palette(n_colors = len(TMA_samples),palette = "gray") | |
| sb.palplot(sb.color_palette(TMA_color_values)) | |
| # Store in a dictionary | |
| color_dict = dict() | |
| color_dict = dict(zip(df.Sample_ID.unique(), color_values)) | |
| # Replace all TMA samples' colors with gray | |
| i = 0 | |
| for key in color_dict.keys(): | |
| if 'TMA' in key: | |
| color_dict[key] = TMA_color_values[i] | |
| i +=1 | |
| color_dict | |
| color_df_sample = color_dict_to_df(color_dict, "Sample_ID") | |
| # Save to file in metadatadirectory | |
| filename = "sample_color_data.csv" | |
| filename = os.path.join(metadata_dir, filename) | |
| color_df_sample.to_csv(filename, index = False) | |
| color_df_sample | |
| # Legend of sample info only | |
| g = plt.figure(figsize = (1,1)).add_subplot(111) | |
| g.axis('off') | |
| handles = [] | |
| for item in color_dict.keys(): | |
| h = g.bar(0,0, color = color_dict[item], | |
| label = item, linewidth =0) | |
| handles.append(h) | |
| first_legend = plt.legend(handles=handles, loc='upper right', title = 'Sample') | |
| filename = "Sample_legend.png" | |
| filename = os.path.join(metadata_images_dir, filename) | |
| plt.savefig(filename, bbox_inches = 'tight') | |
| filename = "sample_color_data.csv" | |
| filename = os.path.join(metadata_dir, filename) | |
| # Check file exists | |
| #if not os.path.exists(filename): | |
| # print("WARNING: Could not find desired file: " + filename) | |
| #else : | |
| # print("The",filename,"file was imported for further analysis!") | |
| # Open, read in information | |
| df = pd.read_csv(filename, header = 0) | |
| df = df.drop(columns = ['hex']) | |
| # our tuple of float values for rgb, (r, g, b) was read in | |
| # as a string '(r, g, b)'. We need to extract the r-, g-, and b- | |
| # substrings and convert them back into floats | |
| df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1) | |
| # Verify size | |
| #print("Verifying data read from file is the correct length...\n") | |
| #verify_line_no(filename, df.shape[0] + 1) | |
| # Turn into dictionary | |
| sample_color_dict = df.set_index('Sample_ID')['rgb'].to_dict() | |
| # Print information | |
| #print('sample_color_dict =\n',sample_color_dict) | |
| # #### CELL TYPES COLORS | |
| # Define your custom colors for each cell type | |
| custom_colors = { | |
| 'CANCER': (0.1333, 0.5451, 0.1333), | |
| 'STROMA': (0.4, 0.4, 0.4), | |
| 'IMMUNE': (1, 1, 0), | |
| 'ENDOTHELIAL': (0.502, 0, 0.502) | |
| } | |
| # Retrieve the list of cell types | |
| cell_types = list(custom_colors.keys()) | |
| # Extract the corresponding colors from the dictionary | |
| color_values = [custom_colors[cell] for cell in cell_types] | |
| # Display the colors | |
| sb.palplot(sb.color_palette(color_values)) | |
| # Store in a dctionnary | |
| celltype_color_dict = dict(zip(cell_types, color_values)) | |
| celltype_color_dict | |
| # Save color information (mapping and legend) to metadata directory | |
| # Create dataframe | |
| celltype_color_df = color_dict_to_df(celltype_color_dict, "cell_type") | |
| celltype_color_df.head() | |
| # Save to file in metadatadirectory | |
| filename = "celltype_color_data.csv" | |
| filename = os.path.join(metadata_dir, filename) | |
| celltype_color_df.to_csv(filename, index = False) | |
| #print("File" + filename + " was created!") | |
| # Legend of cell type info only | |
| g = plt.figure(figsize = (1,1)).add_subplot(111) | |
| g.axis('off') | |
| handles = [] | |
| for item in celltype_color_dict.keys(): | |
| h = g.bar(0,0, color = celltype_color_dict[item], | |
| label = item, linewidth =0) | |
| handles.append(h) | |
| first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell type'), | |
| filename = "Celltype_legend.png" | |
| filename = os.path.join(metadata_images_dir, filename) | |
| plt.savefig(filename, bbox_inches = 'tight') | |
| filename = "celltype_color_data.csv" | |
| filename = os.path.join(metadata_dir, filename) | |
| # Check file exists | |
| #if not os.path.exists(filename): | |
| # print("WARNING: Could not find desired file: "+filename) | |
| #else : | |
| # print("The",filename,"file was imported for further analysis!") | |
| # Open, read in information | |
| df = pd.read_csv(filename, header = 0) | |
| df = df.drop(columns = ['hex']) | |
| # our tuple of float values for rgb, (r, g, b) was read in | |
| # as a string '(r, g, b)'. We need to extract the r-, g-, and b- | |
| # substrings and convert them back into floats | |
| df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1) | |
| # Verify size | |
| #print("Verifying data read from file is the correct length...\n") | |
| #verify_line_no(filename, df.shape[0] + 1) | |
| # Turn into dictionary | |
| cell_type_color_dict = df.set_index('cell_type')['rgb'].to_dict() | |
| # Print information | |
| #print('cell_type_color_dict =\n',cell_type_color_dict) | |
| # Colors dictionaries | |
| sample_row_colors =df2.Sample_ID.map(sample_color_dict) | |
| #print(sample_row_colors[1:5]) | |
| cell_type_row_colors = df2.cell_type.map(cell_type_color_dict) | |
| #print(cell_type_row_colors[1:5]) | |
| # ## Cell Subtype Colours | |
| import pandas as pd | |
| import os | |
| def rgb_tuple_from_str(rgb_str): | |
| # Cleaning the string to remove any unexpected 'np.float64' | |
| rgb_str = rgb_str.replace("(","").replace(")","").replace(" ","").replace("np.float64", "") | |
| try: | |
| rgb = list(map(float, rgb_str.split(","))) | |
| return tuple(rgb) | |
| except ValueError as e: | |
| # print(f"Error converting {rgb_str} to floats: {e}") | |
| return None # or handle the error as needed | |
| filename = "cellsubtype_color_data.csv" | |
| filename = os.path.join(metadata_dir, filename) | |
| # Check file exists | |
| #if not os.path.exists(filename): | |
| # print("WARNING: Could not find desired file: " + filename) | |
| #else: | |
| # print("The", filename, "file was imported for further analysis!") | |
| # Open, read in information | |
| df = pd.read_csv(filename, header=0) | |
| df = df.drop(columns=['hex']) | |
| # Clean the 'rgb' column to remove unexpected strings | |
| df['rgb'] = df['rgb'].str.replace("np.float64", "", regex=False) | |
| # Apply the function to convert string to tuple of floats | |
| df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis=1) | |
| # Verify size | |
| #print("Verifying data read from file is the correct length...\n") | |
| # verify_line_no(filename, df.shape[0] + 1) | |
| # Turn into dictionary | |
| cell_subtype_color_dict = df.set_index('cell_subtype')['rgb'].to_dict() | |
| # Print information | |
| #print('cell_subtype_color_dict =\n', cell_subtype_color_dict) | |
| df2 | |
| # Colors dictionaries | |
| sample_row_colors =df2.Sample_ID.map(sample_color_dict) | |
| #print(sample_row_colors[1:5]) | |
| cell_subtype_row_colors = df2.cell_subtype.map(cell_subtype_color_dict) | |
| #print(cell_subtype_row_colors[1:5]) | |
| # #### Cell Type | |
| df | |
| #print(f"Loaded sample files: {ls_samples}") | |
| selected_intensities = list(df.columns) | |
| selected_intensities = list(df.columns) | |
| #print(selected_intensities) | |
| df | |
| df2 | |
| df = df2 | |
| df | |
| import json | |
| import pandas as pd | |
| import numpy as np | |
| import panel as pn | |
| import plotly.graph_objects as go | |
| pn.extension('plotly') | |
| # Load the selected intensities from the JSON file | |
| with open(stored_variables_path, 'r') as f: | |
| json_data = json.load(f) | |
| ls_samples = json_data["ls_samples"] | |
| #print(f"Loaded sample files: {ls_samples}") | |
| # Checkbox group to select files | |
| checkbox_group = pn.widgets.CheckBoxGroup(name='Select Files', options=ls_samples) | |
| # Initially empty dropdowns for X and Y axis selection | |
| x_axis_dropdown = pn.widgets.Select(name='Select X-Axis', options=[]) | |
| y_axis_dropdown = pn.widgets.Select(name='Select Y-Axis', options=[]) | |
| # Input field for the number of random samples | |
| random_sample_input = pn.widgets.IntInput(name='Number of Random Samples', value=20000, step=100) | |
| # Sliders for interactive X and Y lines | |
| x_line_slider = pn.widgets.FloatSlider(name='X Axis Line Position', start=0, end=1, step=0.01) | |
| y_line_slider = pn.widgets.FloatSlider(name='Y Axis Line Position', start=0, end=1, step=0.01) | |
| # Placeholder for the dot plot | |
| plot_placeholder = pn.pane.Plotly() | |
| # Placeholder for the digital reconstruction plot | |
| reconstruction_placeholder = pn.pane.Plotly() | |
| # Function to create the dot plot | |
| def create_dot_plot(selected_files, x_axis, y_axis, n_samples, x_line_pos, y_line_pos): | |
| if not selected_files: | |
| # print("No files selected.") | |
| return go.Figure() | |
| keep = selected_files | |
| test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy() | |
| # print(f"Number of samples in test2_df: {len(test2_df)}") | |
| if len(test2_df) > n_samples: | |
| random_rows = np.random.choice(len(test2_df), n_samples) | |
| test_df = test2_df.iloc[random_rows, :].copy() | |
| else: | |
| test_df = test2_df | |
| # print(f"Number of samples in test_df: {len(test_df)}") | |
| if x_axis not in test_df.columns or y_axis not in test_df.columns: | |
| # print(f"Selected axes {x_axis} or {y_axis} not in DataFrame columns.") | |
| return go.Figure() | |
| fig = go.Figure() | |
| title = 'Threshold' | |
| fig.add_trace(go.Scatter( | |
| x=test_df[x_axis], | |
| y=test_df[y_axis], | |
| mode='markers', | |
| marker=dict(color='LightSkyBlue', size=2) | |
| )) | |
| # Add vertical and horizontal lines | |
| fig.add_vline(x=x_line_pos, line_width=2, line_dash="dash", line_color="red") | |
| fig.add_hline(y=y_line_pos, line_width=2, line_dash="dash", line_color="red") | |
| fig.update_layout( | |
| title=title, | |
| plot_bgcolor='white', | |
| autosize=True, | |
| margin=dict(l=20, r=20, t=40, b=20), | |
| xaxis=dict(title=x_axis, linecolor='black', range=[test_df[x_axis].min(), test_df[x_axis].max()]), | |
| yaxis=dict(title=y_axis, linecolor='black', range=[test_df[y_axis].min(), test_df[y_axis].max()]) | |
| ) | |
| return fig | |
| def assign_cell_types_again(): | |
| with open(stored_variables_path, 'r') as file: | |
| stored_variables = json.load(file) | |
| intensities = list(df.columns) | |
| def assign_cell_type(row): | |
| for intensity in intensities: | |
| marker = intensity.split('_')[0] # Extract marker from intensity name | |
| if marker in stored_variables['thresholds']: | |
| threshold = stored_variables['thresholds'][marker] | |
| if row[intensity] > threshold: | |
| for cell_type, markers in stored_variables['cell_type_classification'].items(): | |
| if marker in markers: | |
| return cell_type | |
| return 'STROMA' # Default if no condition matches | |
| df['cell_type'] = df.apply(lambda row: assign_cell_type(row), axis=1) | |
| return df | |
| # Function to create the digital reconstruction plot | |
| def create_reconstruction_plot(selected_files): | |
| if not selected_files: | |
| # print("No files selected.") | |
| return go.Figure() | |
| df = assign_cell_types_again() | |
| fig = go.Figure() | |
| for sample in selected_files: | |
| sample_id = sample | |
| sample_id2 = sample.split('_')[0] | |
| location_colors = df.loc[df['Sample_ID'] == sample_id, ['Nuc_X', 'Nuc_Y_Inv', 'cell_type']] | |
| title = sample_id2 + " Background Subtracted XY Map cell types" | |
| for celltype in df.loc[df['Sample_ID'] == sample_id, 'cell_type'].unique(): | |
| fig.add_scatter( | |
| mode='markers', | |
| marker=dict(size=3, opacity=0.5, color='rgb' + str(cell_type_color_dict[celltype])), | |
| x=location_colors.loc[location_colors['cell_type'] == celltype, 'Nuc_X'], | |
| y=location_colors.loc[location_colors['cell_type'] == celltype, 'Nuc_Y_Inv'], | |
| name=celltype | |
| ) | |
| fig.update_layout( | |
| title=title, | |
| plot_bgcolor='white', | |
| autosize=True, | |
| margin=dict(l=20, r=20, t=40, b=20), | |
| legend=dict( | |
| title='Cell Types', | |
| font=dict( | |
| family='Arial', | |
| size=12, | |
| color='black' | |
| ), | |
| bgcolor='white', | |
| bordercolor='black', | |
| borderwidth=0.4, | |
| itemsizing='constant' | |
| ), | |
| xaxis=dict(title='Nuc_X', linecolor='black', range=[location_colors['Nuc_X'].min(), location_colors['Nuc_X'].max()]), | |
| yaxis=dict(title='Nuc_Y_Inv', linecolor='black', range=[location_colors['Nuc_Y_Inv'].min(), location_colors['Nuc_Y_Inv'].max()]) | |
| ) | |
| return fig | |
| def update_dropdown_options(event): | |
| selected_files = checkbox_group.value | |
| # print(f"Selected files in update_dropdown_options: {selected_files}") | |
| if selected_files: | |
| keep = selected_files | |
| test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy() | |
| selected_intensities = list(test2_df.columns) | |
| selected_intensities = [col for col in selected_intensities if '_Intensity_Average' in col] | |
| # print(f"Updated dropdown options: {selected_intensities}") | |
| x_axis_dropdown.options = selected_intensities | |
| y_axis_dropdown.options = selected_intensities | |
| else: | |
| x_axis_dropdown.options = [] | |
| y_axis_dropdown.options = [] | |
| def update_slider_ranges(event): | |
| selected_files = checkbox_group.value | |
| x_axis = x_axis_dropdown.value | |
| y_axis = y_axis_dropdown.value | |
| # print("Axis:",x_axis,y_axis) | |
| if selected_files and x_axis and y_axis: | |
| keep = selected_files | |
| test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy() | |
| x_range = (test2_df[x_axis].min(), test2_df[x_axis].max()) | |
| y_range = (test2_df[y_axis].min(), test2_df[y_axis].max()) | |
| # print("Ranges:",x_range,y_range) | |
| x_line_slider.start = -abs(x_range[1]) | |
| x_line_slider.end = abs(x_range[1]) | |
| y_line_slider.start = -abs(y_range[1]) | |
| y_line_slider.end = abs(y_range[1]) | |
| x_line_slider.value = 0 | |
| y_line_slider.value = 0 | |
| def on_value_change(event): | |
| selected_files = checkbox_group.value | |
| x_axis = x_axis_dropdown.value | |
| y_axis = y_axis_dropdown.value | |
| n_samples = random_sample_input.value | |
| x_line_pos = x_line_slider.value | |
| y_line_pos = y_line_slider.value | |
| # print(f"Selected files: {selected_files}") | |
| # print(f"X-Axis: {x_axis}, Y-Axis: {y_axis}, Number of samples: {n_samples}, X Line: {x_line_pos}, Y Line: {y_line_pos}") | |
| plot = create_dot_plot(selected_files, x_axis, y_axis, n_samples, x_line_pos, y_line_pos) | |
| reconstruction_plot = create_reconstruction_plot(selected_files) | |
| plot_placeholder.object = plot | |
| reconstruction_placeholder.object = reconstruction_plot | |
| # Link value changes to function | |
| checkbox_group.param.watch(update_dropdown_options, 'value') | |
| checkbox_group.param.watch(update_slider_ranges, 'value') | |
| x_axis_dropdown.param.watch(update_slider_ranges, 'value') | |
| y_axis_dropdown.param.watch(update_slider_ranges, 'value') | |
| x_axis_dropdown.param.watch(on_value_change, 'value') | |
| y_axis_dropdown.param.watch(on_value_change, 'value') | |
| random_sample_input.param.watch(on_value_change, 'value') | |
| x_line_slider.param.watch(on_value_change, 'value') | |
| y_line_slider.param.watch(on_value_change, 'value') | |
| # Layout | |
| plot_with_reconstruction = pn.Column( | |
| "## Select Files to Construct Dot Plot", | |
| checkbox_group, | |
| x_axis_dropdown, | |
| y_axis_dropdown, | |
| random_sample_input, | |
| pn.Row(x_line_slider, y_line_slider), | |
| pn.Row( | |
| pn.Column( | |
| "## Dot Plot", | |
| pn.Column(plot_placeholder)), | |
| pn.Column( | |
| "## Digital Reconstruction Plot", | |
| reconstruction_placeholder), | |
| )) | |
| # Serve the app | |
| #plot_with_reconstruction.show() | |
| # ## MAKE HEATMAPS | |
| # ### Cell Subtype | |
| # Create data structure to hold everything we need for row/column annotations | |
| # annotations is a dictionary | |
| ## IMPORTANT - if you use 'annotations', it MUST have both 'rows' and 'cols' | |
| ## objects inside. These can be empty lists, but they must be there! | |
| anns = {} | |
| # create a data structure to hold everything we need for only row annotations | |
| # row_annotations is a list, where each item therein is a dictioary corresponding | |
| # to all of the data pertaining to that particular annotation | |
| # Adding each item (e.g., Sample, then Cluster), one at a time to ensure ordering | |
| # is as anticipated on figure | |
| row_annotations = [] | |
| row_annotations.append({'label':'Sample', | |
| 'type':'row', | |
| 'mapping':sample_row_colors, | |
| 'dict':sample_color_dict, | |
| 'location':'center left', | |
| 'bbox_to_anchor':(0.1, 0.9)}) | |
| row_annotations.append({'label':'Cell type', | |
| 'type':'row', | |
| 'mapping':cell_type_row_colors, | |
| 'dict':cell_type_color_dict, | |
| 'location':'center left', | |
| 'bbox_to_anchor':(0.17, 0.9)}) | |
| anns['rows'] = row_annotations | |
| # Now we repeat the process for column annotations | |
| col_annotations = [] | |
| anns['cols'] = col_annotations | |
| # To simplify marker display in the following figures (heatmap, etc) | |
| figure_marker_names = {key: value.split('_')[0] for key, value in full_to_short_names.items()} | |
| not_intensities | |
| df2 | |
| df2.drop('cell_subtype', axis = 'columns') | |
| not_intensities = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size', | |
| 'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID', | |
| 'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)'] | |
| df2 = assign_cell_types_again() | |
| df2.drop('cell_subtype', axis = 'columns') | |
| df2.head() | |
| # Save one heatmap | |
| data = df | |
| data | |
| #print(data.columns) | |
| # Selecting a subset of rows from df based on the 'Sample_ID' column | |
| # and then random>ly choosing 50,000 rows from that subset to create the DataFrame test_df | |
| with open(stored_variables_path, 'r') as file: | |
| ls_samples = stored_vars['ls_samples'] | |
| keep = list(ls_samples) | |
| keep_cell_type = ['STROMA','CANCER','IMMUNE','ENDOTHELIAL'] | |
| # Check the individual conditions | |
| cell_type_condition = data['cell_type'].isin(keep_cell_type) | |
| sample_id_condition = data['Sample_ID'].isin(keep) | |
| #print("Cell type condition:") | |
| #print(cell_type_condition.head()) | |
| #print("Sample ID condition:") | |
| #print(sample_id_condition.head()) | |
| # Combine the conditions | |
| combined_condition = cell_type_condition & sample_id_condition | |
| #print("Combined condition:") | |
| #print(combined_condition.head()) | |
| # Apply the combined condition to filter the DataFrame | |
| test2_df = data.loc[combined_condition].copy() | |
| #print("Filtered DataFrame:") | |
| #print(test2_df.head()) | |
| #test2_df = data.loc[data['cell_type'].isin(keep_cell_type) & data['Sample_ID'].isin(keep)].copy() | |
| #print("Test2_df",test2_df.head()) | |
| #print(len(test2_df)) | |
| #random_rows = np.random.choice(len(test2_df),len(test2_df)) | |
| random_rows = np.random.choice(len(test2_df),1000) | |
| test_df = test2_df.iloc[random_rows,:].copy() | |
| #print(len(test_df)) | |
| test_df | |
| import json | |
| import panel as pn | |
| import param | |
| import pandas as pd | |
| # Initialize Panel extension | |
| pn.extension('tabulator') | |
| # Path to the stored variables file | |
| file_path = stored_variables_path | |
| # Load existing data from stored_variables.json with error handling | |
| def load_data(): | |
| try: | |
| with open(file_path, 'r') as file: | |
| return json.load(file) | |
| except json.JSONDecodeError as e: | |
| print(f"Error reading JSON file: {e}") | |
| return {} | |
| data = load_data() | |
| # Define markers, cell types, and cell subtypes from the loaded data | |
| markers = data.get('markers', []) | |
| cell_types = data.get('cell_type', []) | |
| cell_subtypes = data.get('cell_subtype', []) | |
| # Sanitize option names | |
| def sanitize_options(options): | |
| return [opt.replace(' ', '_').replace('+', 'plus').replace('α', 'a').replace("'", "") for opt in options] | |
| sanitized_cell_types = sanitize_options(cell_types) | |
| sanitized_cell_subtypes = sanitize_options(cell_subtypes) | |
| # Helper function to create a Parameterized class and DataFrame | |
| def create_classification_df(items, item_label): | |
| params = {item_label: param.String()} | |
| for marker in markers: | |
| params[marker] = param.Boolean(default=False) | |
| Classification = type(f'{item_label}Classification', (param.Parameterized,), params) | |
| classification_widgets = [] | |
| for item in items: | |
| item_params = {marker: False for marker in markers} | |
| item_params[item_label] = item | |
| classification_widgets.append(Classification(**item_params)) | |
| classification_df = pd.DataFrame([cw.param.values() for cw in classification_widgets]) | |
| classification_df = classification_df[[item_label] + markers] | |
| return classification_df | |
| # Create DataFrames for cell types and cell subtypes | |
| cell_type_df = create_classification_df(sanitized_cell_types, 'CELL_TYPE') | |
| cell_subtype_df = create_classification_df(sanitized_cell_subtypes, 'CELL_SUBTYPE') | |
| # Define formatters for Tabulator widgets | |
| tabulator_formatters = {marker: {'type': 'tickCross'} for marker in markers} | |
| # Create Tabulator widgets | |
| cell_type_table = pn.widgets.Tabulator(cell_type_df, formatters=tabulator_formatters) | |
| cell_subtype_table = pn.widgets.Tabulator(cell_subtype_df, formatters=tabulator_formatters) | |
| # Save functions for cell types and cell subtypes | |
| def save_data(table, classification_key, item_label): | |
| current_data = table.value | |
| df_bool = current_data.replace({'✔': True, '✘': False}) | |
| classification = {} | |
| for i, row in df_bool.iterrows(): | |
| item = row[item_label] | |
| selected_markers = [marker for marker in markers if row[marker]] | |
| classification[item] = selected_markers | |
| data[classification_key] = classification | |
| # try: | |
| with open(file_path, 'w') as file: | |
| json.dump(data, file, indent=4) | |
| # print(f"{classification_key} saved successfully.") | |
| # except IOError as e: | |
| # print(f"Error writing JSON file: {e}") | |
| # Button actions | |
| def save_cell_type_selections(event): | |
| save_data(cell_type_table, 'cell_type_classification', 'CELL_TYPE') | |
| def save_cell_subtype_selections(event): | |
| save_data(cell_subtype_table, 'cell_subtype_classification', 'CELL_SUBTYPE') | |
| # Create save buttons | |
| save_cell_type_button = pn.widgets.Button(name='Save Cell Type Selections', button_type='primary') | |
| save_cell_type_button.on_click(save_cell_type_selections) | |
| save_cell_subtype_button = pn.widgets.Button(name='Save Cell Subtype Selections', button_type='primary') | |
| save_cell_subtype_button.on_click(save_cell_subtype_selections) | |
| cell_type_classification_app_main = pn.Column( | |
| pn.pane.Markdown("# Cell Type Classification"), | |
| cell_type_table, | |
| save_cell_type_button | |
| ) | |
| cell_subtype_classification_app_main = pn.Column( | |
| pn.pane.Markdown("# Cell Subtype Classification"), | |
| cell_subtype_table, | |
| save_cell_subtype_button | |
| ) | |
| #cell_subtype_classification_app_main.show() | |
| import json | |
| import panel as pn | |
| # Load existing stored variables | |
| with open(stored_variables_path, 'r') as f: | |
| stored_variables = json.load(f) | |
| # Initialize a dictionary to hold threshold inputs | |
| subtype_threshold_inputs = {} | |
| # Create widgets for each marker to get threshold inputs from the user | |
| for marker in stored_variables['markers']: | |
| subtype_threshold_inputs[marker] = pn.widgets.FloatInput(name=f'{marker} Threshold', value=0.0, step=0.1) | |
| try: | |
| with open(stored_variables_path, 'r') as f: | |
| stored_variables = json.load(f) | |
| except FileNotFoundError: | |
| stored_variables = {} | |
| # Check if 'thresholds' field is present, if not, add it | |
| if 'subtype_thresholds' not in stored_variables: | |
| subtype_thresholds = {marker: input_widget.value for marker, input_widget in subtype_threshold_inputs.items()} | |
| stored_variables['subtype_thresholds'] = subtype_thresholds | |
| with open(stored_variables_path, 'w') as f: | |
| json.dump(stored_variables, f, indent=4) | |
| # Save button to save thresholds to stored_variables.json | |
| def save_thresholds(event): | |
| subtype_thresholds = {marker: input_widget.value for marker, input_widget in subtype_threshold_inputs.items()} | |
| stored_variables['subtype_thresholds'] = subtype_thresholds | |
| with open(stored_variables_path, 'w') as f: | |
| json.dump(stored_variables, f, indent=4) | |
| save_button = pn.widgets.Button(name='Save Thresholds', button_type='primary') | |
| save_button.on_click(save_thresholds) | |
| # Create a GridSpec layout | |
| subtype_grid = pn.GridSpec() | |
| # Add the widgets to the grid with five per row | |
| row = 0 | |
| col = 0 | |
| for marker in stored_variables['markers']: | |
| subtype_grid[row, col] = subtype_threshold_inputs[marker] | |
| col += 1 | |
| if col == 5: | |
| col = 0 | |
| row += 1 | |
| # Add the save button at the end, spanning across all columns of the new row | |
| subtype_grid[row + 1, :5] = save_button | |
| # Panel layout | |
| subtype_threshold_panel = pn.Column( | |
| pn.pane.Markdown("## Define Thresholds for Markers"), | |
| subtype_grid) | |
| # Display the panel | |
| #subtype_threshold_panel.show() | |
| with open(stored_variables_path, 'r') as file: | |
| stored_variables = json.load(file) | |
| intensities = list(df.columns) | |
| def assign_cell_subtypes(row): | |
| for intensity in intensities: | |
| marker = intensity.split('_')[0] # Extract marker from intensity name | |
| if marker in stored_variables['subtype_thresholds']: | |
| threshold = stored_variables['subtype_thresholds'][marker] | |
| if row[intensity] > threshold: | |
| for cell_subtype, markers in stored_variables['cell_subtype_classification'].items(): | |
| if marker in markers: | |
| return cell_subtype | |
| return 'DC' | |
| df = assign_cell_types_again() | |
| df['cell_subtype'] = df.apply(lambda row: assign_cell_subtypes(row), axis=1) | |
| df | |
| data | |
| # Define a color dictionary | |
| cell_subtype_color_dict = { | |
| 'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725), | |
| 'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765), | |
| 'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353), | |
| 'Exhausted TCD4': (0.2, 0.6274509803921569, 0.17254901960784313), | |
| 'Exhausted TCD8': (0.984313725490196, 0.6039215686274509, 0.6), | |
| 'TCD8': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745), | |
| 'M1': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883), | |
| 'M2': (1.0, 0.4980392156862745, 0.0), | |
| 'Treg': (0.792156862745098, 0.6980392156862745, 0.8392156862745098), | |
| 'Other CD45+': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509), | |
| 'Cancer': (1.0, 1.0, 0.6), | |
| 'myCAF αSMA+': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392), | |
| 'Stroma': (0.6509803921568628, 0.807843137254902, 0.8901960784313725), | |
| 'Endothelial': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765) | |
| } | |
| # Add the 'rgb' prefix to the colors | |
| cell_subtype_color_dict = {k: f"rgb{v}" for k, v in cell_subtype_color_dict.items()} | |
| # Load stored variables from JSON file | |
| def load_stored_variables(path): | |
| with open(path, 'r') as file: | |
| return json.load(file) | |
| # Get subtype intensities columns | |
| subtype_intensities = [col for col in df.columns if '_Intensity_Average' in col] | |
| # Assign cell subtype based on thresholds and classifications | |
| def assign_cell_subtype(row): | |
| #print("new_row") | |
| stored_variables = load_stored_variables(stored_variables_path) | |
| for subtype_intensity in subtype_intensities: | |
| marker = subtype_intensity.split('_')[0] | |
| if marker in stored_variables['subtype_thresholds']: | |
| subtype_threshold = stored_variables['subtype_thresholds'][marker] | |
| if row[subtype_intensity] > subtype_threshold: | |
| for cell_subtype, markers in stored_variables['cell_subtype_classification'].items(): | |
| #print(cell_subtype,marker,markers) | |
| if marker in markers: | |
| #print("Markers:",marker) | |
| return cell_subtype # Return the assigned subtype | |
| return 'DC' # Default value if no conditions match | |
| # Main function to assign cell subtypes to DataFrame | |
| def assign_cell_subtypes_again(): | |
| df['cell_subtype'] = df.apply(lambda row: assign_cell_subtype(row), axis=1) | |
| return df | |
| import json | |
| import pandas as pd | |
| import numpy as np | |
| import panel as pn | |
| import plotly.graph_objects as go | |
| pn.extension('plotly') | |
| # Load the selected intensities from the JSON file | |
| with open(stored_variables_path, 'r') as f: | |
| json_data = json.load(f) | |
| subtype_ls_samples = json_data["ls_samples"] | |
| #print(f"Loaded sample files: {subtype_ls_samples}") | |
| # Checkbox group to select files | |
| subtype_checkbox_group = pn.widgets.CheckBoxGroup(name='Select Files', options=subtype_ls_samples) | |
| # Initially empty dropdowns for X and Y axis selection | |
| subtype_x_axis_dropdown = pn.widgets.Select(name='Select X-Axis', options=[]) | |
| subtype_y_axis_dropdown = pn.widgets.Select(name='Select Y-Axis', options=[]) | |
| # Input field for the number of random samples | |
| subtype_random_sample_input = pn.widgets.IntInput(name='Number of Random Samples', value=20000, step=100) | |
| # Sliders for interactive X and Y lines | |
| subtype_x_line_slider = pn.widgets.FloatSlider(name='X Axis Line Position', start=0, end=1, step=0.01) | |
| subtype_y_line_slider = pn.widgets.FloatSlider(name='Y Axis Line Position', start=0, end=1, step=0.01) | |
| # Placeholder for the dot plot | |
| subtype_plot_placeholder = pn.pane.Plotly() | |
| # Placeholder for the digital reconstruction plot | |
| subtype_reconstruction_placeholder = pn.pane.Plotly() | |
| def update_color_dict(): | |
| # Define a color dictionary | |
| cell_subtype_color_dict = { | |
| 'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725), | |
| 'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765), | |
| 'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353), | |
| 'Exhausted TCD4': (0.2, 0.6274509803921569, 0.17254901960784313), | |
| 'Exhausted TCD8': (0.984313725490196, 0.6039215686274509, 0.6), | |
| 'TCD8': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745), | |
| 'M1': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883), | |
| 'M2': (1.0, 0.4980392156862745, 0.0), | |
| 'Treg': (0.792156862745098, 0.6980392156862745, 0.8392156862745098), | |
| 'Other CD45+': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509), | |
| 'Cancer': (1.0, 1.0, 0.6), | |
| 'myCAF αSMA+': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392), | |
| 'Stroma': (0.6509803921568628, 0.807843137254902, 0.8901960784313725), | |
| 'Endothelial': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765) | |
| } | |
| # Add the 'rgb' prefix to the colors | |
| cell_subtype_color_dict = {k: f"rgb{v}" for k, v in cell_subtype_color_dict.items()} | |
| return cell_subtype_color_dict | |
| # Function to create the dot plot | |
| def create_subtype_dot_plot(subtype_selected_files, subtype_x_axis, subtype_y_axis, subtype_n_samples, subtype_x_line_pos, subtype_y_line_pos): | |
| if not subtype_selected_files: | |
| # print("No files selected.") | |
| return go.Figure() | |
| subtype_keep = subtype_selected_files | |
| # print(df) | |
| subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy() | |
| #subtype_test2_df = df.loc[df['Sample_ID'].isin('TMA.csv'), :].copy() | |
| # print(f"Number of samples in test2_df: {len(subtype_test2_df)}") | |
| if len(subtype_test2_df) > subtype_n_samples: | |
| subtype_random_rows = np.random.choice(len(subtype_test2_df), subtype_n_samples) | |
| subtype_test_df = subtype_test2_df.iloc[subtype_random_rows, :].copy() | |
| else: | |
| subtype_test_df = subtype_test2_df | |
| # print(f"Number of samples in test_df: {len(subtype_test_df)}") | |
| if subtype_x_axis not in subtype_test_df.columns or subtype_y_axis not in subtype_test_df.columns: | |
| # print(f"Selected axes {subtype_x_axis} or {subtype_y_axis} not in DataFrame columns.") | |
| return go.Figure() | |
| fig = go.Figure() | |
| title = 'Threshold' | |
| fig.add_trace(go.Scatter( | |
| x=subtype_test_df[subtype_x_axis], | |
| y=subtype_test_df[subtype_y_axis], | |
| mode='markers', | |
| marker=dict(color='LightSkyBlue', size=2) | |
| )) | |
| # Add vertical and horizontal lines | |
| fig.add_vline(x=subtype_x_line_pos, line_width=2, line_dash="dash", line_color="red") | |
| fig.add_hline(y=subtype_y_line_pos, line_width=2, line_dash="dash", line_color="red") | |
| fig.update_layout( | |
| title=title, | |
| plot_bgcolor='white', | |
| autosize=True, | |
| margin=dict(l=20, r=20, t=40, b=20), | |
| xaxis=dict(title=subtype_x_axis, linecolor='black', range=[subtype_test_df[subtype_x_axis].min(), subtype_test_df[subtype_x_axis].max()]), | |
| yaxis=dict(title=subtype_y_axis, linecolor='black', range=[subtype_test_df[subtype_y_axis].min(), subtype_test_df[subtype_y_axis].max()]) | |
| ) | |
| return fig | |
| def create_subtype_reconstruction_plot(subtype_selected_files): | |
| cell_subtype_color_dict = update_color_dict() | |
| # print(subtype_selected_files) | |
| if not subtype_selected_files: | |
| # print("No files selected.") | |
| return go.Figure() | |
| df = assign_cell_subtypes_again() | |
| subtype_fig = go.Figure() | |
| for sample in subtype_selected_files: | |
| sample_id = sample | |
| sample_id2 = sample.split('_')[0] | |
| location_colors = df.loc[df['Sample_ID'] == sample_id, ['Nuc_X', 'Nuc_Y_Inv', 'cell_subtype']] | |
| # print(location_colors.head()) | |
| title = sample_id2 + " Background Subtracted XY Map cell subtypes" | |
| for cellsubtype in df.loc[df['Sample_ID'] == sample_id, 'cell_subtype'].unique(): | |
| color = str(cell_subtype_color_dict[cellsubtype]) | |
| subtype_fig.add_scatter( | |
| mode='markers', | |
| marker=dict(size=3, opacity=0.5, color=color), | |
| x=location_colors.loc[location_colors['cell_subtype'] == cellsubtype, 'Nuc_X'], | |
| y=location_colors.loc[location_colors['cell_subtype'] == cellsubtype, 'Nuc_Y_Inv'], | |
| name=cellsubtype | |
| ) | |
| subtype_fig.update_layout(title=title, plot_bgcolor='white') | |
| subtype_fig.update_xaxes(title_text='Nuc_X', linecolor='black') | |
| subtype_fig.update_yaxes(title_text='Nuc_Y_Inv', linecolor='black') | |
| # Adjust the size of the points | |
| for trace in subtype_fig.data: | |
| trace.marker.size = 2 | |
| subtype_fig.update_layout( | |
| title=title, | |
| plot_bgcolor='white', | |
| legend=dict( | |
| title='Cell Subtypes', # Legend title | |
| font=dict( | |
| family='Arial', | |
| size=12, | |
| color='black' | |
| ), | |
| bgcolor='white', | |
| bordercolor='black', | |
| borderwidth=0.4, | |
| itemsizing='constant' | |
| ) | |
| ) | |
| # Save the figure as an image if needed | |
| #subtype_fig.write_image(output_images_dir + "/" + title.replace(" ", "_") + ".png", width=1200, height=800, scale=4) | |
| # print(sample_id, "processed!") | |
| return subtype_fig | |
| def update_subtype_dropdown_options(event): | |
| # print(1) | |
| subtype_selected_files = subtype_checkbox_group.value | |
| # print(f"Selected files in update_dropdown_options: {subtype_selected_files}") | |
| if subtype_selected_files: | |
| subtype_keep = subtype_selected_files | |
| subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy() | |
| subtype_selected_intensities = list(subtype_test2_df.columns) | |
| subtype_selected_intensities = [col for col in subtype_selected_intensities if '_Intensity_Average' in col] | |
| # print(f"Updated dropdown options: {subtype_selected_intensities}") | |
| subtype_x_axis_dropdown.options = subtype_selected_intensities | |
| subtype_y_axis_dropdown.options = subtype_selected_intensities | |
| else: | |
| subtype_x_axis_dropdown.options = [] | |
| subtype_y_axis_dropdown.options = [] | |
| def update_subtype_slider_ranges(event): | |
| subtype_selected_files = subtype_checkbox_group.value | |
| subtype_x_axis = subtype_x_axis_dropdown.value | |
| subtype_y_axis = subtype_y_axis_dropdown.value | |
| if subtype_selected_files and subtype_x_axis and subtype_y_axis: | |
| subtype_keep = subtype_selected_files | |
| subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy() | |
| subtype_x_range = (subtype_test2_df[subtype_x_axis].min(), subtype_test2_df[subtype_x_axis].max()) | |
| subtype_y_range = (subtype_test2_df[subtype_y_axis].min(), subtype_test2_df[subtype_y_axis].max()) | |
| subtype_x_line_slider.start = -abs(subtype_x_range[1]) | |
| subtype_x_line_slider.end = abs(subtype_x_range[1]) | |
| subtype_y_line_slider.start = -abs(subtype_y_range[1]) | |
| subtype_y_line_slider.end = abs(subtype_y_range[1]) | |
| subtype_x_line_slider.value = 0 | |
| subtype_y_line_slider.value = 0 | |
| def on_subtype_value_change(event): | |
| subtype_selected_files = subtype_checkbox_group.value | |
| subtype_x_axis = subtype_x_axis_dropdown.value | |
| subtype_y_axis = subtype_y_axis_dropdown.value | |
| subtype_n_samples = subtype_random_sample_input.value | |
| subtype_x_line_pos = subtype_x_line_slider.value | |
| subtype_y_line_pos = subtype_y_line_slider.value | |
| # print(f"Selected files: {subtype_selected_files}") | |
| # print(f"X-Axis: {subtype_x_axis}, Y-Axis: {subtype_y_axis}, Number of samples: {subtype_n_samples}, X Line: {subtype_x_line_pos}, Y Line: {subtype_y_line_pos}") | |
| subtype_plot = create_subtype_dot_plot(subtype_selected_files, subtype_x_axis, subtype_y_axis, subtype_n_samples, subtype_x_line_pos, subtype_y_line_pos) | |
| subtype_reconstruction_plot = create_subtype_reconstruction_plot(subtype_selected_files) | |
| subtype_plot_placeholder.object = subtype_plot | |
| subtype_reconstruction_placeholder.object = subtype_reconstruction_plot | |
| # Link value changes to function | |
| subtype_checkbox_group.param.watch(update_subtype_dropdown_options, 'value') | |
| subtype_checkbox_group.param.watch(update_subtype_slider_ranges, 'value') | |
| subtype_x_axis_dropdown.param.watch(update_subtype_slider_ranges, 'value') | |
| subtype_y_axis_dropdown.param.watch(update_subtype_slider_ranges, 'value') | |
| subtype_x_axis_dropdown.param.watch(on_subtype_value_change, 'value') | |
| subtype_y_axis_dropdown.param.watch(on_subtype_value_change, 'value') | |
| subtype_random_sample_input.param.watch(on_subtype_value_change, 'value') | |
| subtype_x_line_slider.param.watch(on_subtype_value_change, 'value') | |
| subtype_y_line_slider.param.watch(on_subtype_value_change, 'value') | |
| # Layout | |
| plot_with_subtype_reconstruction = pn.Column( | |
| "## Select Files to Construct Dot Plot", | |
| subtype_checkbox_group, | |
| subtype_x_axis_dropdown, | |
| subtype_y_axis_dropdown, | |
| subtype_random_sample_input, | |
| pn.Row(subtype_x_line_slider, subtype_y_line_slider), | |
| pn.Row( | |
| pn.Column( | |
| "## Dot Plot", | |
| pn.Column(subtype_plot_placeholder)), | |
| pn.Column( | |
| "## Cell Subtype Digital Reconstruction Plot", | |
| subtype_reconstruction_placeholder), | |
| ) | |
| ) | |
| subtype_x_axis = subtype_x_axis_dropdown.value | |
| subtype_y_axis = subtype_y_axis_dropdown.value | |
| #print(subtype_x_axis ,subtype_y_axis) | |
| # Normalize the values in df2.cell_subtype | |
| df2['cell_subtype'] = df2['cell_subtype'].str.strip().str.lower() | |
| # Normalize the keys in cell_subtype_color_dict | |
| cell_subtype_color_dict = {k.strip().lower(): v for k, v in cell_subtype_color_dict.items()} | |
| # Map the cell_subtype values to colors | |
| cell_subtype_row_colors = df2.cell_subtype.map(cell_subtype_color_dict) | |
| # Debugging: print the unique values and the resulting mapped colors | |
| #print("Unique values in df2.cell_subtype:", df2.cell_subtype.unique()) | |
| #print("Keys in cell_subtype_color_dict:", cell_subtype_color_dict.keys()) | |
| #print(cell_subtype_row_colors[1:5]) | |
| data | |
| cell_subtype_color_dict | |
| # Remove the 'rgb' prefix | |
| cell_subtype_color_dict = {k: v[3:] for k, v in cell_subtype_color_dict.items()} | |
| cell_subtype_color_dict | |
| # Colors dictionaries | |
| sample_row_colors =df.Sample_ID.map(sample_color_dict) | |
| #print(sample_row_colors[1:5]) | |
| cell_subtype_row_colors = df.cell_subtype.map(cell_subtype_color_dict) | |
| #print(cell_subtype_row_colors[1:5]) | |
| # Count of each immune_checkpoint type by cell_subtype | |
| counts = df.groupby(['cell_type', 'cell_subtype']).size().reset_index(name='count') | |
| counts | |
| total = sum(counts['count']) | |
| counts['percentage'] = counts.groupby('cell_subtype')['count'].transform(lambda x: (x / total) * 100) | |
| #print(counts) | |
| # ## IV.10. SAVE | |
| # Save the data by Sample_ID | |
| # Check for the existence of the output file first | |
| for sample in ls_samples: | |
| #sample_id = sample.split('_')[0] | |
| sample_id = sample | |
| filename = os.path.join(output_data_dir, sample_id + "_" + step_suffix + ".csv") | |
| if os.path.exists(filename): | |
| df_save = df.loc[df['Sample_ID'] == sample_id, :] | |
| df_save.to_csv(filename, index=True, index_label='ID', mode='w') # 'mode='w'' overwrites the file | |
| # print("File " + filename + " was overwritten!") | |
| else: | |
| df_save = df.loc[df['Sample_ID'] == sample_id, :] | |
| df_save.to_csv(filename, index=True, index_label='ID') # Save normally if the file doesn't exist | |
| # print("File " + filename + " was created and saved !") | |
| # All samples | |
| filename = os.path.join(output_data_dir, "all_Samples_" + project_name + ".csv") | |
| # Save the DataFrame to a CSV file | |
| df.to_csv(filename, index=True, index_label='ID') | |
| #print("Merged file " + filename + " created!") | |
| # ## Panel App | |
| # Create widgets and panes | |
| df_widget = pn.widgets.DataFrame(metadata, name="MetaData") | |
| # Define the three tabs content | |
| metadata_tab = pn.Column(pn.pane.Markdown("## Initial DataFrame"),intial_df) | |
| dotplot_tab = pn.Column(plot_with_reconstruction) | |
| celltype_classification_tab = pn.Column(cell_type_classification_app_main, threshold_panel) | |
| cellsubtype_classification_tab = pn.Column(cell_subtype_classification_app_main, subtype_threshold_panel) | |
| subtype_dotplot_tab = pn.Column(plot_with_subtype_reconstruction,) | |
| app4_5 = pn.template.GoldenTemplate( | |
| site="Cyc-IF", | |
| title="Marker Threshold & Classification", | |
| main=[ | |
| pn.Tabs( | |
| ("Metadata", metadata_tab), | |
| ("Classify-Celltype-Marker",celltype_classification_tab), | |
| ("Cell_Types", dotplot_tab), | |
| ("Classify-Cell Subtype-Marker",cellsubtype_classification_tab), | |
| ("Cell-Subtypes", subtype_dotplot_tab), | |
| # ("Heatmap",pn.Column(celltype_heatmap, cell_subtype_heatmap)) | |
| ) | |
| ] | |
| ) | |
| app4_5.show() |