import pandas as pd
import numpy as np
import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from skimage import filters, morphology, measure, color
from skimage.measure import regionprops
from tifffile import imread, imsave
import glob
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from skimage import measure
from scipy.spatial.distance import pdist
plt.rcParams['pdf.fonttype'] = 'truetype'
plt.rcParams['font.size'] = 20
plt.style.use('seaborn-v0_8-ticks')
color_palette = {'DMSO' : '#4d4d4d',
'ARYL_AG':'#F8766D', 'ARYL_INH':"#F8766D50",
'ANDR_AG':'#fccb17', 'ANDR_INH':'#C49A0050',
"ESTR_AG": '#53B400', "ESTR_INH": '#53B40050',
'GC_AG' : '#00C094', 'GC_INH' : '#00C09450',
'LX_AG' : '#00B6EB', 'LX_INH' : '#00B6EB50',
'RET_AG' : '#A58AFF', 'RET_INH' : '#A58AFF50',
'THYR_AG' : '#FB61D7', 'THYR_INH' : '#FB61D750'
}
import matplotlib.colors as mcolors
def hex_to_rgba(hex_color):
"""Convert hex color to RGBA. Supports 8-digit hex with alpha."""
hex_color = hex_color.lstrip('#')
if len(hex_color) == 6:
return mcolors.to_rgba('#' + hex_color)
elif len(hex_color) == 8:
rgb = hex_color[:6]
alpha = int(hex_color[6:], 16) / 255
return (*mcolors.to_rgb('#' + rgb), alpha)
else:
raise ValueError(f"Invalid hex color: #{hex_color}")
def hex_to_rgba_string(hex_color):
"""Convert hex color to rgba() string for Seaborn."""
hex_color = hex_color.lstrip('#')
if len(hex_color) == 6:
r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
return f'rgba({r}, {g}, {b}, 1.0)'
elif len(hex_color) == 8:
r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
a = int(hex_color[6:], 16) / 255
return f'rgba({r}, {g}, {b}, {a:.3f})'
else:
raise ValueError(f"Invalid hex color: #{hex_color}")
# Convert all to RGBA
color_palette_rgba = {k: hex_to_rgba(v) for k, v in color_palette.items()}
color_palette_rgba_str = {k: hex_to_rgba_string(v) for k, v in color_palette.items()}
color_palette_expanded = {'DMSO_CTL08A': '#cacaca',
'DMSO_CTL04E': '#4d4d4d',
'ARYL_AG_CTL08A': '#fdd6d3',
'ARYL_AG_CTL04E': '#F8766D',
'ARYL_INH_CTL08A': '#fdd6d3',
'ARYL_INH_CTL04E': '#F8766D50',
'ANDR_AG_CTL08A': '#feefb9',
'ANDR_AG_CTL04E': '#fccb17',
'ANDR_INH_CTL08A': '#ede1b250',
'ANDR_INH_CTL04E': '#C49A0050',
'ESTR_AG_CTL08A': '#cbe8b2',
'ESTR_AG_CTL04E': '#53B400',
'ESTR_INH_CTL08A': '#cbe8b250',
'ESTR_INH_CTL04E': '#53B40050',
'GC_AG_CTL08A': '#b2ecdf',
'GC_AG_CTL04E': '#00C094',
'GC_INH_CTL08A': '#b2ecdf50',
'GC_INH_CTL04E': '#00C09450',
'LX_AG_CTL08A': '#b2e9f9',
'LX_AG_CTL04E': '#00B6EB',
'LX_INH_CTL08A': '#b2e9f950',
'LX_INH_CTL04E': '#00B6EB50',
'RET_AG_CTL08A': '#e4dcff',
'RET_AG_CTL04E': '#A58AFF',
'RET_INH_CTL08A': '#e4dcff50',
'RET_INH_CTL04E': '#A58AFF50',
'THYR_AG_CTL08A': '#fed0f3',
'THYR_AG_CTL04E': '#FB61D7',
'THYR_INH_CTL08A': '#fed0f350',
'THYR_INH_CTL04E': '#FB61D750'}
line_palette = {
'CTL08A': "#99cfd670", # azzurro pastello
'CTL04E': "#f8c88a70", # arancio pastello
}
labels = list(color_palette.keys())
handles = [plt.Line2D([0], [0], color=color_palette[label], lw=8) for label in labels]
fig, ax = plt.subplots(figsize=(8, 2))
legend = ax.legend(
handles, labels, loc='center', ncol=int(np.ceil(len(labels)/2)), frameon=False,
bbox_to_anchor=(0.5, 0.5)
)
ax.axis('off')
plt.tight_layout()
plt.savefig('../../endpoints_figures/color_palette_legend.pdf', dpi=300, bbox_inches='tight')
/tmp/ipykernel_3750874/414211028.py:10: UserWarning: Tight layout not applied. The left and right margins cannot be made large enough to accommodate all axes decorations. plt.tight_layout()
blank_mix1 = glob.glob('/group/testa/Project/EndPoints/TPSSU/analysis/single_tif_scanregion/*0136*.tif')
blank_mix1 = [f.split('/')[-1].split('.')[0] for f in blank_mix1]
blank_mix2 = glob.glob('/group/testa/Project/EndPoints/TPSSU/analysis/single_tif_scanregion/*0145*.tif')
blank_mix2 = [f.split('/')[-1].split('.')[0] for f in blank_mix2]
check_qc_ch1 = pd.read_csv('../../preprocessing/tables/Treshold_QC_check - Channel1_no_dup.csv')
check_qc_ch2 = pd.read_csv('../../preprocessing/tables/Treshold_QC_check - Channel2_no_dup.csv')
check_qc_ch1['general QC (keep or not)'] = check_qc_ch1['general QC (keep or not)'].str.upper()
check_qc_ch1['general QC (keep or not)'] = check_qc_ch1['general QC (keep or not)'].replace({'BETTER NOT': 'LIMIT', 'MAYBE': 'LIMIT', 'YES?': 'LIMIT', 'SUS': 'LIMIT', 'NOT SURE': 'LIMIT', 'YES ': 'YES', 'YES - 2 CBO': 'YES'})
check_qc_ch1['general QC (keep or not)'].value_counts().index
Index(['YES', 'LIMIT', 'NO'], dtype='object', name='general QC (keep or not)')
check_qc_ch1['Reference Image Report'] = check_qc_ch1['Reference Image Report'].str.replace('.pdf', '')
check_qc_ch1['Reference Image Report'].value_counts()
Reference Image Report 20240424_manuel_lessi_0148_ScanRegion1_rep_1_channel_1 1 20240424_manuel_lessi_0145_ScanRegion2_rep_2_channel_1 1 20240424_manuel_lessi_0145_ScanRegion3_rep_0_channel_1 1 20240424_manuel_lessi_0145_ScanRegion3_rep_1_channel_1 1 20240424_manuel_lessi_0145_ScanRegion3_rep_2_channel_1 1 .. 20240424_manuel_lessi_0136_ScanRegion2_rep_1_channel_1 1 20240424_manuel_lessi_0136_ScanRegion2_rep_2_channel_1 1 20240424_manuel_lessi_0136_ScanRegion3_rep_0_channel_1 1 20240424_manuel_lessi_0136_ScanRegion3_rep_1_channel_1 1 20240424_manuel_lessi_0136_ScanRegion3_rep_2_channel_1 1 Name: count, Length: 153, dtype: int64
check_qc_ch2['general QC (keep or not)'] = check_qc_ch2['general QC (keep or not)'].str.upper()
check_qc_ch2['general QC (keep or not)'] = check_qc_ch2['general QC (keep or not)'].replace({'NOT SURE': 'LIMIT', '?': 'LIMIT', 'YES ': 'YES'})
check_qc_ch2['general QC (keep or not)'].value_counts().index
Index(['YES', 'NO', 'LIMIT'], dtype='object', name='general QC (keep or not)')
check_qc_ch2['Reference Image Report'] = check_qc_ch2['Reference Image Report'].str.replace('.pdf', '')
check_qc_ch1['Reference Image Report'].value_counts()
Reference Image Report 20240424_manuel_lessi_0148_ScanRegion1_rep_1_channel_1 1 20240424_manuel_lessi_0145_ScanRegion2_rep_2_channel_1 1 20240424_manuel_lessi_0145_ScanRegion3_rep_0_channel_1 1 20240424_manuel_lessi_0145_ScanRegion3_rep_1_channel_1 1 20240424_manuel_lessi_0145_ScanRegion3_rep_2_channel_1 1 .. 20240424_manuel_lessi_0136_ScanRegion2_rep_1_channel_1 1 20240424_manuel_lessi_0136_ScanRegion2_rep_2_channel_1 1 20240424_manuel_lessi_0136_ScanRegion3_rep_0_channel_1 1 20240424_manuel_lessi_0136_ScanRegion3_rep_1_channel_1 1 20240424_manuel_lessi_0136_ScanRegion3_rep_2_channel_1 1 Name: count, Length: 153, dtype: int64
check_qc_ch1['ImageName_ScanRegion_Rep'] = check_qc_ch1['Reference Image Report'].apply(lambda x: '_'.join(x.split('_')[:7]))
check_qc_ch2['ImageName_ScanRegion_Rep'] = check_qc_ch2['Reference Image Report'].apply(lambda x: '_'.join(x.split('_')[:7]))
to_keep_ch1 = check_qc_ch1[check_qc_ch1['general QC (keep or not)'].isin(['YES', 'LIMIT'])]
to_keep_ch2 = check_qc_ch2[check_qc_ch2['general QC (keep or not)'].isin(['YES', 'LIMIT'])]
len(set(to_keep_ch1['ImageName_ScanRegion_Rep']).intersection(set(to_keep_ch2['ImageName_ScanRegion_Rep'])))
100
len(check_qc_ch1['ImageName_ScanRegion_Rep'])
153
len(check_qc_ch2['ImageName_ScanRegion_Rep'])
275
good_tissue_to_keep_limit = list(set(check_qc_ch1[check_qc_ch1['general QC (keep or not)'].isin(['YES', 'LIMIT'])]['ImageName_ScanRegion_Rep']).union(set(check_qc_ch2[check_qc_ch2['general QC (keep or not)'].isin(['YES', 'LIMIT'])]['ImageName_ScanRegion_Rep'])))
len(good_tissue_to_keep_limit)
245
good_tissue_to_keep_yes = list(set(check_qc_ch1[check_qc_ch1['general QC (keep or not)'].isin(['YES'])]['ImageName_ScanRegion_Rep']).union(set(check_qc_ch2[check_qc_ch2['general QC (keep or not)'].isin(['YES'])]['ImageName_ScanRegion_Rep'])))
len(good_tissue_to_keep_yes)
211
sample_sheet = pd.read_csv('../../preprocessing/tables/endpoints_ImageAnalysisSampleSheet.csv')
sample_sheet
Path | Scan Area | Condition | Line | OPA | Paraffin Block ID | MIX | |
---|---|---|---|---|---|---|---|
0 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | ESTR_AG | CTL04E | 12 | 29 | 1 |
1 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | ESTR_AG | CTL08A | 12 | 30 | 1 |
2 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | ESTR_INH | CTL04E | 12 | 31 | 1 |
3 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | ANDR_AG | CTL04E | 12 | 33 | 1 |
4 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | ESTR_INH | CTL08A | 12 | 32 | 1 |
5 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | ANDR_AG | CTL08A | 12 | 34 | 1 |
6 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | ANDR_ANT | CTL04E | 13 | 35 | 1 |
7 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | ARYL_INH | CTL08A | 13 | 40 | 1 |
8 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | ARYL_INH | CTL04E | 13 | 39 | 1 |
9 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | ARYL_AG | CTL08A | 13 | 38 | 1 |
10 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | ANDR_ANT | CTL08A | 13 | 36 | 1 |
11 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | ARYL_AG | CTL04E | 13 | 37 | 1 |
12 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | LX_AG | CTL04E | 14 | 41 | 1 |
13 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | GC_AG | CTL08A | 14 | 46 | 1 |
14 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | LX_AG | CTL08A | 14 | 42 | 1 |
15 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | LX_INH | CTL04E | 14 | 43 | 1 |
16 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | LX_INH | CTL08A | 14 | 44 | 1 |
17 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | GC_INH | CTL04E | 15 | 47 | 1 |
18 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | THYR_INH | CTL04E | 15 | 51 | 1 |
19 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | THYR_AG | CTL08A | 15 | 50 | 1 |
20 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | GC_INH | CTL08A | 15 | 48 | 1 |
21 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | THYR_AG | CTL04E | 15 | 49 | 1 |
22 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | THYR_INH | CTL08A | 15 | 52 | 1 |
23 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | DMSO | CTL04E | 16 | 57 | 1 |
24 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | DMSO | CTL08A | 16 | 58 | 1 |
25 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | RET_AG | CTL08A | 17 | 64 | 1 |
26 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | RET_INH | CTL04E | 18 | 65 | 1 |
27 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | RET_INH | CTL08A | 18 | 66 | 1 |
28 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | ESTR_AG | CTL04E | 12 | 29 | 2 |
29 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | ANDR_AG | CTL08A | 12 | 34 | 2 |
30 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | ESTR_AG | CTL08A | 12 | 30 | 2 |
31 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | ESTR_INH | CTL04E | 12 | 31 | 2 |
32 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | ANDR_AG | CTL04E | 12 | 33 | 2 |
33 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | ESTR_INH | CTL08A | 12 | 32 | 2 |
34 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | ANDR_INH | CTL08A | 13 | 36 | 2 |
35 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | ANDR_INH | CTL04E | 13 | 35 | 2 |
36 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | ARYL_INH | CTL04E | 13 | 39 | 2 |
37 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | ARYL_AG | CTL08A | 13 | 38 | 2 |
38 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | ARYL_INH | CTL08A | 13 | 40 | 2 |
39 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | ARYL_AG | CTL04E | 13 | 37 | 2 |
40 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | LX_AG | CTL08A | 14 | 42 | 2 |
41 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | LX_INH | CTL08A | 14 | 44 | 2 |
42 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | LX_AG | CTL04E | 14 | 41 | 2 |
43 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | LX_INH | CTL04E | 14 | 43 | 2 |
44 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | GC_AG | CTL08A | 14 | 46 | 2 |
45 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | GC_INH | CTL08A | 15 | 48 | 2 |
46 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | GC_INH | CTL04E | 15 | 47 | 2 |
47 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | THYR_INH | CTL04E | 15 | 51 | 2 |
48 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | THYR_AG | CTL04E | 15 | 49 | 2 |
49 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | THYR_INH | CTL08A | 15 | 52 | 2 |
50 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | THYR_AG | CTL08A | 15 | 50 | 2 |
51 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | DMSO | CTL08A | 16 | 58 | 2 |
52 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | DMSO | CTL04E | 16 | 57 | 2 |
53 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | RET_AG | CTL08A | 17 | 64 | 2 |
54 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | RET_INH | CTL08A | 18 | 66 | 2 |
55 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | RET_INH | CTL04E | 18 | 65 | 2 |
sample_sheet['ImageName'] = sample_sheet['Path'].apply(lambda x: x.split('\\')[-1].split('.')[0])
sample_sheet['ImageName_ScanRegion'] = sample_sheet['ImageName'] + '_' + sample_sheet['Scan Area']
sample_sheet['Condition_Line'] = sample_sheet['Condition'] + '_' + sample_sheet['Line ']
sample_sheet
Path | Scan Area | Condition | Line | OPA | Paraffin Block ID | MIX | ImageName | ImageName_ScanRegion | Condition_Line | |
---|---|---|---|---|---|---|---|---|---|---|
0 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | ESTR_AG | CTL04E | 12 | 29 | 1 | 20240424_manuel_lessi_0128 | 20240424_manuel_lessi_0128_ScanRegion0 | ESTR_AG_CTL04E |
1 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | ESTR_AG | CTL08A | 12 | 30 | 1 | 20240424_manuel_lessi_0128 | 20240424_manuel_lessi_0128_ScanRegion1 | ESTR_AG_CTL08A |
2 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | ESTR_INH | CTL04E | 12 | 31 | 1 | 20240424_manuel_lessi_0128 | 20240424_manuel_lessi_0128_ScanRegion2 | ESTR_INH_CTL04E |
3 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | ANDR_AG | CTL04E | 12 | 33 | 1 | 20240424_manuel_lessi_0128 | 20240424_manuel_lessi_0128_ScanRegion3 | ANDR_AG_CTL04E |
4 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | ESTR_INH | CTL08A | 12 | 32 | 1 | 20240424_manuel_lessi_0128 | 20240424_manuel_lessi_0128_ScanRegion4 | ESTR_INH_CTL08A |
5 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | ANDR_AG | CTL08A | 12 | 34 | 1 | 20240424_manuel_lessi_0128 | 20240424_manuel_lessi_0128_ScanRegion5 | ANDR_AG_CTL08A |
6 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | ANDR_ANT | CTL04E | 13 | 35 | 1 | 20240424_manuel_lessi_0129 | 20240424_manuel_lessi_0129_ScanRegion0 | ANDR_ANT_CTL04E |
7 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | ARYL_INH | CTL08A | 13 | 40 | 1 | 20240424_manuel_lessi_0129 | 20240424_manuel_lessi_0129_ScanRegion1 | ARYL_INH_CTL08A |
8 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | ARYL_INH | CTL04E | 13 | 39 | 1 | 20240424_manuel_lessi_0129 | 20240424_manuel_lessi_0129_ScanRegion2 | ARYL_INH_CTL04E |
9 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | ARYL_AG | CTL08A | 13 | 38 | 1 | 20240424_manuel_lessi_0129 | 20240424_manuel_lessi_0129_ScanRegion3 | ARYL_AG_CTL08A |
10 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | ANDR_ANT | CTL08A | 13 | 36 | 1 | 20240424_manuel_lessi_0129 | 20240424_manuel_lessi_0129_ScanRegion4 | ANDR_ANT_CTL08A |
11 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | ARYL_AG | CTL04E | 13 | 37 | 1 | 20240424_manuel_lessi_0129 | 20240424_manuel_lessi_0129_ScanRegion5 | ARYL_AG_CTL04E |
12 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | LX_AG | CTL04E | 14 | 41 | 1 | 20240424_manuel_lessi_0130 | 20240424_manuel_lessi_0130_ScanRegion0 | LX_AG_CTL04E |
13 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | GC_AG | CTL08A | 14 | 46 | 1 | 20240424_manuel_lessi_0130 | 20240424_manuel_lessi_0130_ScanRegion1 | GC_AG_CTL08A |
14 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | LX_AG | CTL08A | 14 | 42 | 1 | 20240424_manuel_lessi_0130 | 20240424_manuel_lessi_0130_ScanRegion2 | LX_AG_CTL08A |
15 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | LX_INH | CTL04E | 14 | 43 | 1 | 20240424_manuel_lessi_0130 | 20240424_manuel_lessi_0130_ScanRegion3 | LX_INH_CTL04E |
16 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | LX_INH | CTL08A | 14 | 44 | 1 | 20240424_manuel_lessi_0130 | 20240424_manuel_lessi_0130_ScanRegion4 | LX_INH_CTL08A |
17 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | GC_INH | CTL04E | 15 | 47 | 1 | 20240424_manuel_lessi_0131 | 20240424_manuel_lessi_0131_ScanRegion0 | GC_INH_CTL04E |
18 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | THYR_INH | CTL04E | 15 | 51 | 1 | 20240424_manuel_lessi_0131 | 20240424_manuel_lessi_0131_ScanRegion1 | THYR_INH_CTL04E |
19 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | THYR_AG | CTL08A | 15 | 50 | 1 | 20240424_manuel_lessi_0131 | 20240424_manuel_lessi_0131_ScanRegion2 | THYR_AG_CTL08A |
20 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | GC_INH | CTL08A | 15 | 48 | 1 | 20240424_manuel_lessi_0131 | 20240424_manuel_lessi_0131_ScanRegion3 | GC_INH_CTL08A |
21 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | THYR_AG | CTL04E | 15 | 49 | 1 | 20240424_manuel_lessi_0131 | 20240424_manuel_lessi_0131_ScanRegion4 | THYR_AG_CTL04E |
22 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | THYR_INH | CTL08A | 15 | 52 | 1 | 20240424_manuel_lessi_0131 | 20240424_manuel_lessi_0131_ScanRegion5 | THYR_INH_CTL08A |
23 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | DMSO | CTL04E | 16 | 57 | 1 | 20240424_manuel_lessi_0132 | 20240424_manuel_lessi_0132_ScanRegion4 | DMSO_CTL04E |
24 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | DMSO | CTL08A | 16 | 58 | 1 | 20240424_manuel_lessi_0132 | 20240424_manuel_lessi_0132_ScanRegion5 | DMSO_CTL08A |
25 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | RET_AG | CTL08A | 17 | 64 | 1 | 20240424_manuel_lessi_0133 | 20240424_manuel_lessi_0133_ScanRegion4 | RET_AG_CTL08A |
26 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | RET_INH | CTL04E | 18 | 65 | 1 | 20240424_manuel_lessi_0134 | 20240424_manuel_lessi_0134_ScanRegion0 | RET_INH_CTL04E |
27 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | RET_INH | CTL08A | 18 | 66 | 1 | 20240424_manuel_lessi_0134 | 20240424_manuel_lessi_0134_ScanRegion2 | RET_INH_CTL08A |
28 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | ESTR_AG | CTL04E | 12 | 29 | 2 | 20240424_manuel_lessi_0137 | 20240424_manuel_lessi_0137_ScanRegion0 | ESTR_AG_CTL04E |
29 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | ANDR_AG | CTL08A | 12 | 34 | 2 | 20240424_manuel_lessi_0137 | 20240424_manuel_lessi_0137_ScanRegion1 | ANDR_AG_CTL08A |
30 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | ESTR_AG | CTL08A | 12 | 30 | 2 | 20240424_manuel_lessi_0137 | 20240424_manuel_lessi_0137_ScanRegion2 | ESTR_AG_CTL08A |
31 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | ESTR_INH | CTL04E | 12 | 31 | 2 | 20240424_manuel_lessi_0137 | 20240424_manuel_lessi_0137_ScanRegion3 | ESTR_INH_CTL04E |
32 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | ANDR_AG | CTL04E | 12 | 33 | 2 | 20240424_manuel_lessi_0137 | 20240424_manuel_lessi_0137_ScanRegion4 | ANDR_AG_CTL04E |
33 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | ESTR_INH | CTL08A | 12 | 32 | 2 | 20240424_manuel_lessi_0137 | 20240424_manuel_lessi_0137_ScanRegion5 | ESTR_INH_CTL08A |
34 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | ANDR_INH | CTL08A | 13 | 36 | 2 | 20240424_manuel_lessi_0138 | 20240424_manuel_lessi_0138_ScanRegion0 | ANDR_INH_CTL08A |
35 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | ANDR_INH | CTL04E | 13 | 35 | 2 | 20240424_manuel_lessi_0138 | 20240424_manuel_lessi_0138_ScanRegion1 | ANDR_INH_CTL04E |
36 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | ARYL_INH | CTL04E | 13 | 39 | 2 | 20240424_manuel_lessi_0138 | 20240424_manuel_lessi_0138_ScanRegion2 | ARYL_INH_CTL04E |
37 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | ARYL_AG | CTL08A | 13 | 38 | 2 | 20240424_manuel_lessi_0138 | 20240424_manuel_lessi_0138_ScanRegion3 | ARYL_AG_CTL08A |
38 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | ARYL_INH | CTL08A | 13 | 40 | 2 | 20240424_manuel_lessi_0138 | 20240424_manuel_lessi_0138_ScanRegion4 | ARYL_INH_CTL08A |
39 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | ARYL_AG | CTL04E | 13 | 37 | 2 | 20240424_manuel_lessi_0138 | 20240424_manuel_lessi_0138_ScanRegion5 | ARYL_AG_CTL04E |
40 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | LX_AG | CTL08A | 14 | 42 | 2 | 20240424_manuel_lessi_0139 | 20240424_manuel_lessi_0139_ScanRegion0 | LX_AG_CTL08A |
41 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | LX_INH | CTL08A | 14 | 44 | 2 | 20240424_manuel_lessi_0139 | 20240424_manuel_lessi_0139_ScanRegion1 | LX_INH_CTL08A |
42 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | LX_AG | CTL04E | 14 | 41 | 2 | 20240424_manuel_lessi_0139 | 20240424_manuel_lessi_0139_ScanRegion2 | LX_AG_CTL04E |
43 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | LX_INH | CTL04E | 14 | 43 | 2 | 20240424_manuel_lessi_0139 | 20240424_manuel_lessi_0139_ScanRegion3 | LX_INH_CTL04E |
44 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | GC_AG | CTL08A | 14 | 46 | 2 | 20240424_manuel_lessi_0139 | 20240424_manuel_lessi_0139_ScanRegion4 | GC_AG_CTL08A |
45 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | GC_INH | CTL08A | 15 | 48 | 2 | 20240424_manuel_lessi_0140 | 20240424_manuel_lessi_0140_ScanRegion0 | GC_INH_CTL08A |
46 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | GC_INH | CTL04E | 15 | 47 | 2 | 20240424_manuel_lessi_0140 | 20240424_manuel_lessi_0140_ScanRegion1 | GC_INH_CTL04E |
47 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | THYR_INH | CTL04E | 15 | 51 | 2 | 20240424_manuel_lessi_0140 | 20240424_manuel_lessi_0140_ScanRegion2 | THYR_INH_CTL04E |
48 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | THYR_AG | CTL04E | 15 | 49 | 2 | 20240424_manuel_lessi_0140 | 20240424_manuel_lessi_0140_ScanRegion3 | THYR_AG_CTL04E |
49 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion4 | THYR_INH | CTL08A | 15 | 52 | 2 | 20240424_manuel_lessi_0140 | 20240424_manuel_lessi_0140_ScanRegion4 | THYR_INH_CTL08A |
50 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion5 | THYR_AG | CTL08A | 15 | 50 | 2 | 20240424_manuel_lessi_0140 | 20240424_manuel_lessi_0140_ScanRegion5 | THYR_AG_CTL08A |
51 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | DMSO | CTL08A | 16 | 58 | 2 | 20240424_manuel_lessi_0141 | 20240424_manuel_lessi_0141_ScanRegion2 | DMSO_CTL08A |
52 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion3 | DMSO | CTL04E | 16 | 57 | 2 | 20240424_manuel_lessi_0141 | 20240424_manuel_lessi_0141_ScanRegion3 | DMSO_CTL04E |
53 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion2 | RET_AG | CTL08A | 17 | 64 | 2 | 20240424_manuel_lessi_0142 | 20240424_manuel_lessi_0142_ScanRegion2 | RET_AG_CTL08A |
54 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion0 | RET_INH | CTL08A | 18 | 66 | 2 | 20240424_manuel_lessi_0143 | 20240424_manuel_lessi_0143_ScanRegion0 | RET_INH_CTL08A |
55 | T:\Project\EndPoints\TPSSU\Pictures\20240424_m... | ScanRegion1 | RET_INH | CTL04E | 18 | 65 | 2 | 20240424_manuel_lessi_0143 | 20240424_manuel_lessi_0143_ScanRegion1 | RET_INH_CTL04E |
sample_dict= {i:j for i, j in zip(sample_sheet['ImageName_ScanRegion'], sample_sheet['Condition_Line'])}
mix_1_names = sample_sheet[sample_sheet['MIX'] == 1]['ImageName'].tolist()
mix_2_names = sample_sheet[sample_sheet['MIX'] == 2]['ImageName'].tolist()
hormonal_conditions = ['ESTR_AG', 'ESTR_INH', 'ANDR_AG', 'ANDR_INH', 'ARYL_INH',
'ARYL_AG', 'LX_AG', 'GC_AG', 'LX_INH', 'GC_INH', 'THYR_INH',
'THYR_AG', 'DMSO', 'RET_AG',
'RET_INH']
hormonal_conditions.remove('DMSO')
hormonal_conditions.sort()
hormonal_conditions = ['DMSO'] + hormonal_conditions
measurements_files = glob.glob("/group/testa/Project/EndPoints/TPSSU/analysis/measurements/*.csv")
measurements_files_mix_1 = [f for f in measurements_files if any(name in f for name in mix_1_names)]
measurements_files_mix_2 = [f for f in measurements_files if any(name in f for name in mix_2_names)]
mask_file = pd.read_csv("/group/testa/Project/EndPoints/TPSSU/analysis/cyto_mask_measures.csv")
mask_file
Unnamed: 0 | area | label | intensity_mean | intensity_min | intensity_max | |
---|---|---|---|---|---|---|
0 | 20240424_manuel_lessi_0133_ScanRegion4_rep_0_c... | 2415914.0 | 1.0 | 8478.139486 | 5323.0 | 45060.0 |
1 | 20240424_manuel_lessi_0143_ScanRegion0_rep_1_c... | 10287703.0 | 1.0 | 9564.326972 | 6720.0 | 42038.0 |
2 | 20240424_manuel_lessi_0133_ScanRegion2_rep_1_c... | 3015477.0 | 1.0 | 11510.699631 | 5536.0 | 49132.0 |
3 | 20240424_manuel_lessi_0128_ScanRegion4_rep_1_c... | 6843311.0 | 1.0 | 6301.567833 | 4239.0 | 34600.0 |
4 | 20240424_manuel_lessi_0141_ScanRegion0_rep_2_c... | 23305924.0 | 1.0 | 5729.083352 | 3988.0 | 24114.0 |
... | ... | ... | ... | ... | ... | ... |
278 | 20240424_manuel_lessi_0145_ScanRegion2_rep_1_c... | 35809244.0 | 1.0 | 367.353397 | 340.0 | 2983.0 |
279 | 20240424_manuel_lessi_0134_ScanRegion6_rep_1_c... | NaN | NaN | NaN | NaN | NaN |
280 | 20240424_manuel_lessi_0144_ScanRegion2_rep_1_c... | 6015146.0 | 1.0 | 7573.352427 | 4627.0 | 44177.0 |
281 | 20240424_manuel_lessi_0129_ScanRegion5_rep_1_c... | 23404593.0 | 1.0 | 7714.037972 | 4597.0 | 65534.0 |
282 | 20240424_manuel_lessi_0145_ScanRegion0_rep_1_c... | 10141827.0 | 1.0 | 388.299189 | 360.0 | 10460.0 |
283 rows × 6 columns
mask_file['OriginalImageName'] = mask_file['Unnamed: 0'].apply(lambda x: '_'.join(x.split('_')[0:4]))
mask_file['ImageName_ScanRegion'] = mask_file['Unnamed: 0'].apply(lambda x: '_'.join(x.split('_')[0:5]))
mask_file['ImageName_ScanRegion_Rep'] = mask_file['Unnamed: 0'].apply(lambda x: '_'.join(x.split('_')[0:7]))
mask_file
Unnamed: 0 | area | label | intensity_mean | intensity_min | intensity_max | OriginalImageName | ImageName_ScanRegion | ImageName_ScanRegion_Rep | |
---|---|---|---|---|---|---|---|---|---|
0 | 20240424_manuel_lessi_0133_ScanRegion4_rep_0_c... | 2415914.0 | 1.0 | 8478.139486 | 5323.0 | 45060.0 | 20240424_manuel_lessi_0133 | 20240424_manuel_lessi_0133_ScanRegion4 | 20240424_manuel_lessi_0133_ScanRegion4_rep_0 |
1 | 20240424_manuel_lessi_0143_ScanRegion0_rep_1_c... | 10287703.0 | 1.0 | 9564.326972 | 6720.0 | 42038.0 | 20240424_manuel_lessi_0143 | 20240424_manuel_lessi_0143_ScanRegion0 | 20240424_manuel_lessi_0143_ScanRegion0_rep_1 |
2 | 20240424_manuel_lessi_0133_ScanRegion2_rep_1_c... | 3015477.0 | 1.0 | 11510.699631 | 5536.0 | 49132.0 | 20240424_manuel_lessi_0133 | 20240424_manuel_lessi_0133_ScanRegion2 | 20240424_manuel_lessi_0133_ScanRegion2_rep_1 |
3 | 20240424_manuel_lessi_0128_ScanRegion4_rep_1_c... | 6843311.0 | 1.0 | 6301.567833 | 4239.0 | 34600.0 | 20240424_manuel_lessi_0128 | 20240424_manuel_lessi_0128_ScanRegion4 | 20240424_manuel_lessi_0128_ScanRegion4_rep_1 |
4 | 20240424_manuel_lessi_0141_ScanRegion0_rep_2_c... | 23305924.0 | 1.0 | 5729.083352 | 3988.0 | 24114.0 | 20240424_manuel_lessi_0141 | 20240424_manuel_lessi_0141_ScanRegion0 | 20240424_manuel_lessi_0141_ScanRegion0_rep_2 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
278 | 20240424_manuel_lessi_0145_ScanRegion2_rep_1_c... | 35809244.0 | 1.0 | 367.353397 | 340.0 | 2983.0 | 20240424_manuel_lessi_0145 | 20240424_manuel_lessi_0145_ScanRegion2 | 20240424_manuel_lessi_0145_ScanRegion2_rep_1 |
279 | 20240424_manuel_lessi_0134_ScanRegion6_rep_1_c... | NaN | NaN | NaN | NaN | NaN | 20240424_manuel_lessi_0134 | 20240424_manuel_lessi_0134_ScanRegion6 | 20240424_manuel_lessi_0134_ScanRegion6_rep_1 |
280 | 20240424_manuel_lessi_0144_ScanRegion2_rep_1_c... | 6015146.0 | 1.0 | 7573.352427 | 4627.0 | 44177.0 | 20240424_manuel_lessi_0144 | 20240424_manuel_lessi_0144_ScanRegion2 | 20240424_manuel_lessi_0144_ScanRegion2_rep_1 |
281 | 20240424_manuel_lessi_0129_ScanRegion5_rep_1_c... | 23404593.0 | 1.0 | 7714.037972 | 4597.0 | 65534.0 | 20240424_manuel_lessi_0129 | 20240424_manuel_lessi_0129_ScanRegion5 | 20240424_manuel_lessi_0129_ScanRegion5_rep_1 |
282 | 20240424_manuel_lessi_0145_ScanRegion0_rep_1_c... | 10141827.0 | 1.0 | 388.299189 | 360.0 | 10460.0 | 20240424_manuel_lessi_0145 | 20240424_manuel_lessi_0145_ScanRegion0 | 20240424_manuel_lessi_0145_ScanRegion0_rep_1 |
283 rows × 9 columns
mask_file_df_mix_1 = mask_file[mask_file.OriginalImageName.isin(mix_1_names)]
mask_file_df_mix_1
Unnamed: 0 | area | label | intensity_mean | intensity_min | intensity_max | OriginalImageName | ImageName_ScanRegion | ImageName_ScanRegion_Rep | |
---|---|---|---|---|---|---|---|---|---|
0 | 20240424_manuel_lessi_0133_ScanRegion4_rep_0_c... | 2415914.0 | 1.0 | 8478.139486 | 5323.0 | 45060.0 | 20240424_manuel_lessi_0133 | 20240424_manuel_lessi_0133_ScanRegion4 | 20240424_manuel_lessi_0133_ScanRegion4_rep_0 |
2 | 20240424_manuel_lessi_0133_ScanRegion2_rep_1_c... | 3015477.0 | 1.0 | 11510.699631 | 5536.0 | 49132.0 | 20240424_manuel_lessi_0133 | 20240424_manuel_lessi_0133_ScanRegion2 | 20240424_manuel_lessi_0133_ScanRegion2_rep_1 |
3 | 20240424_manuel_lessi_0128_ScanRegion4_rep_1_c... | 6843311.0 | 1.0 | 6301.567833 | 4239.0 | 34600.0 | 20240424_manuel_lessi_0128 | 20240424_manuel_lessi_0128_ScanRegion4 | 20240424_manuel_lessi_0128_ScanRegion4_rep_1 |
9 | 20240424_manuel_lessi_0134_ScanRegion6_rep_0_c... | 7147296.0 | 1.0 | 5035.728892 | 2796.0 | 20250.0 | 20240424_manuel_lessi_0134 | 20240424_manuel_lessi_0134_ScanRegion6 | 20240424_manuel_lessi_0134_ScanRegion6_rep_0 |
12 | 20240424_manuel_lessi_0130_ScanRegion2_rep_1_c... | 5736151.0 | 1.0 | 3811.331029 | 2366.0 | 59754.0 | 20240424_manuel_lessi_0130 | 20240424_manuel_lessi_0130_ScanRegion2 | 20240424_manuel_lessi_0130_ScanRegion2_rep_1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
272 | 20240424_manuel_lessi_0131_ScanRegion2_rep_1_c... | 619563.0 | 1.0 | 7816.673183 | 4931.0 | 56017.0 | 20240424_manuel_lessi_0131 | 20240424_manuel_lessi_0131_ScanRegion2 | 20240424_manuel_lessi_0131_ScanRegion2_rep_1 |
274 | 20240424_manuel_lessi_0129_ScanRegion5_rep_2_c... | 15482008.0 | 1.0 | 5670.165540 | 3481.0 | 56907.0 | 20240424_manuel_lessi_0129 | 20240424_manuel_lessi_0129_ScanRegion5 | 20240424_manuel_lessi_0129_ScanRegion5_rep_2 |
277 | 20240424_manuel_lessi_0133_ScanRegion0_rep_2_c... | 10075425.0 | 1.0 | 4029.633461 | 2629.0 | 41430.0 | 20240424_manuel_lessi_0133 | 20240424_manuel_lessi_0133_ScanRegion0 | 20240424_manuel_lessi_0133_ScanRegion0_rep_2 |
279 | 20240424_manuel_lessi_0134_ScanRegion6_rep_1_c... | NaN | NaN | NaN | NaN | NaN | 20240424_manuel_lessi_0134 | 20240424_manuel_lessi_0134_ScanRegion6 | 20240424_manuel_lessi_0134_ScanRegion6_rep_1 |
281 | 20240424_manuel_lessi_0129_ScanRegion5_rep_1_c... | 23404593.0 | 1.0 | 7714.037972 | 4597.0 | 65534.0 | 20240424_manuel_lessi_0129 | 20240424_manuel_lessi_0129_ScanRegion5 | 20240424_manuel_lessi_0129_ScanRegion5_rep_1 |
116 rows × 9 columns
mask_file_df_mix_2 = mask_file[mask_file.OriginalImageName.isin(mix_2_names)]
mask_file_df_mix_2
Unnamed: 0 | area | label | intensity_mean | intensity_min | intensity_max | OriginalImageName | ImageName_ScanRegion | ImageName_ScanRegion_Rep | |
---|---|---|---|---|---|---|---|---|---|
1 | 20240424_manuel_lessi_0143_ScanRegion0_rep_1_c... | 10287703.0 | 1.0 | 9564.326972 | 6720.0 | 42038.0 | 20240424_manuel_lessi_0143 | 20240424_manuel_lessi_0143_ScanRegion0 | 20240424_manuel_lessi_0143_ScanRegion0_rep_1 |
4 | 20240424_manuel_lessi_0141_ScanRegion0_rep_2_c... | 23305924.0 | 1.0 | 5729.083352 | 3988.0 | 24114.0 | 20240424_manuel_lessi_0141 | 20240424_manuel_lessi_0141_ScanRegion0 | 20240424_manuel_lessi_0141_ScanRegion0_rep_2 |
5 | 20240424_manuel_lessi_0138_ScanRegion3_rep_1_c... | 6139706.0 | 1.0 | 10728.699008 | 7679.0 | 47373.0 | 20240424_manuel_lessi_0138 | 20240424_manuel_lessi_0138_ScanRegion3 | 20240424_manuel_lessi_0138_ScanRegion3_rep_1 |
6 | 20240424_manuel_lessi_0140_ScanRegion2_rep_0_c... | 2887437.0 | 1.0 | 10582.573429 | 8073.0 | 32277.0 | 20240424_manuel_lessi_0140 | 20240424_manuel_lessi_0140_ScanRegion2 | 20240424_manuel_lessi_0140_ScanRegion2_rep_0 |
10 | 20240424_manuel_lessi_0142_ScanRegion3_rep_0_c... | 18501332.0 | 1.0 | 8751.319145 | 6006.0 | 40819.0 | 20240424_manuel_lessi_0142 | 20240424_manuel_lessi_0142_ScanRegion3 | 20240424_manuel_lessi_0142_ScanRegion3_rep_0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
266 | 20240424_manuel_lessi_0139_ScanRegion0_rep_2_c... | 16192847.0 | 1.0 | 12416.619954 | 8449.0 | 52637.0 | 20240424_manuel_lessi_0139 | 20240424_manuel_lessi_0139_ScanRegion0 | 20240424_manuel_lessi_0139_ScanRegion0_rep_2 |
268 | 20240424_manuel_lessi_0140_ScanRegion4_rep_1_c... | 10125138.0 | 1.0 | 6630.255008 | 3940.0 | 59013.0 | 20240424_manuel_lessi_0140 | 20240424_manuel_lessi_0140_ScanRegion4 | 20240424_manuel_lessi_0140_ScanRegion4_rep_1 |
271 | 20240424_manuel_lessi_0137_ScanRegion4_rep_1_c... | 10843902.0 | 1.0 | 6203.065238 | 4195.0 | 41839.0 | 20240424_manuel_lessi_0137 | 20240424_manuel_lessi_0137_ScanRegion4 | 20240424_manuel_lessi_0137_ScanRegion4_rep_1 |
273 | 20240424_manuel_lessi_0142_ScanRegion2_rep_1_c... | 2596930.0 | 1.0 | 4422.110883 | 3233.0 | 18355.0 | 20240424_manuel_lessi_0142 | 20240424_manuel_lessi_0142_ScanRegion2 | 20240424_manuel_lessi_0142_ScanRegion2_rep_1 |
276 | 20240424_manuel_lessi_0141_ScanRegion4_rep_1_c... | 26594153.0 | 1.0 | 6924.010593 | 4440.0 | 41733.0 | 20240424_manuel_lessi_0141 | 20240424_manuel_lessi_0141_ScanRegion4 | 20240424_manuel_lessi_0141_ScanRegion4_rep_1 |
109 rows × 9 columns
def filter_nuclei(df):
min_quantile = df['area'].quantile(0.01)
max_quantile = df['area'].quantile(0.99)
df = df[(df['area'] > min_quantile) & (df['area'] < max_quantile)]
return df
tot_df_mix1_ch1 = pd.read_csv("/group/testa/Project/EndPoints/TPSSU/analysis/measurements_mix_1_ch1_CTIP2.csv")
tot_df_mix2_ch1 = pd.read_csv("/group/testa/Project/EndPoints/TPSSU/analysis/measurements_mix_2_ch1_NeuN.csv")
morphological_features = ['area', 'eccentricity', 'equivalent_diameter', 'extent',
'major_axis_length', 'minor_axis_length', 'orientation', 'perimeter',
'solidity']
tot_df_mix1_ch1['ImageName_ScanRegion_Rep'] = tot_df_mix1_ch1['ImageName'].apply(lambda x: '_'.join(x.split('_')[0:7]))
print(tot_df_mix1_ch1.shape)
tot_df_mix1_ch1 = tot_df_mix1_ch1.groupby('ImageName_ScanRegion_Rep').apply(filter_nuclei).reset_index(drop=True)
print(tot_df_mix1_ch1.shape)
tot_df_mix1_ch1['label_image'] = tot_df_mix1_ch1['label'].astype('str') + '_' + tot_df_mix1_ch1['ImageName_ScanRegion_Rep'].astype('str')
tot_df_mix1_ch1.set_index('label_image', inplace = True)
(4591259, 21)
/tmp/ipykernel_3750874/3325876105.py:11: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning. tot_df_mix1_ch1 = tot_df_mix1_ch1.groupby('ImageName_ScanRegion_Rep').apply(filter_nuclei).reset_index(drop=True)
(4496846, 21)
tot_df_mix1_ch1 = tot_df_mix1_ch1[morphological_features]
tot_df_mix2_ch1['ImageName_ScanRegion_Rep'] = tot_df_mix2_ch1['ImageName'].apply(lambda x: '_'.join(x.split('_')[0:7]))
print(tot_df_mix2_ch1.shape)
tot_df_mix2_ch1 = tot_df_mix2_ch1.groupby('ImageName_ScanRegion_Rep').apply(filter_nuclei).reset_index(drop=True)
print(tot_df_mix2_ch1.shape)
tot_df_mix2_ch1['label_image'] = tot_df_mix2_ch1['label'].astype('str') + '_' + tot_df_mix2_ch1['ImageName_ScanRegion_Rep'].astype('str')
tot_df_mix2_ch1.set_index('label_image', inplace = True)
(4788573, 21)
/tmp/ipykernel_3750874/531573534.py:4: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning. tot_df_mix2_ch1 = tot_df_mix2_ch1.groupby('ImageName_ScanRegion_Rep').apply(filter_nuclei).reset_index(drop=True)
(4690248, 21)
tot_df_mix2_ch1 = tot_df_mix2_ch1[morphological_features]
tot_df_morphological = pd.concat([tot_df_mix1_ch1, tot_df_mix2_ch1], axis=0)
tot_df_morphological['ImageName_ScanRegion_Rep'] = tot_df_morphological.reset_index()['label_image'].apply(lambda x: '_'.join(x.split('_')[1:8])).values
tot_df_morphological['ImageName_ScanRegion'] = tot_df_morphological.reset_index()['label_image'].apply(lambda x: '_'.join(x.split('_')[1:6])).values
tot_df_morphological['Condition_Line'] = tot_df_morphological['ImageName_ScanRegion'].apply(lambda x: sample_dict.get(x, 'Unknown'))
tot_df_morphological['Condition'] = tot_df_morphological['Condition_Line'].apply(lambda x: x.replace('_CTL04E', '').replace('_CTL08A', ''))
tot_df_morphological['Line'] = tot_df_morphological['Condition_Line'].apply(lambda x: x.split('_')[-1])
tot_df_morphological = tot_df_morphological[tot_df_morphological['Condition'].isin(hormonal_conditions)]
tot_df_morphological = tot_df_morphological[tot_df_morphological['ImageName_ScanRegion_Rep'].isin(good_tissue_to_keep_limit)]
tot_df_morphological = tot_df_morphological[tot_df_morphological["ImageName_ScanRegion_Rep"] != "20240424_manuel_lessi_0134_ScanRegion7_rep_1"] # outlier
tot_df_morphological = tot_df_morphological[tot_df_morphological['Condition'] != 'Unknown'] # filter out unknown conditions
features = tot_df_morphological[morphological_features]
len(tot_df_morphological["ImageName_ScanRegion_Rep"].unique())
139
# Standardize the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)
# Perform PCA
pca = PCA()
principal_components = pca.fit_transform(features_scaled)
# Add the principal components to the dataframe
for i in range(principal_components.shape[1]):
tot_df_morphological[f'PC{i+1}'] = principal_components[:, i]
# Extract the most important features for PCA1 and PCA2
pca_components = pd.DataFrame(pca.components_, columns=features.columns, index=[f'PC{i+1}' for i in range(len(pca.components_))])
important_features_pca1 = pca_components.loc['PC1'].abs().sort_values(ascending=False).head(5)
important_features_pca2 = pca_components.loc['PC2'].abs().sort_values(ascending=False).head(5)
important_features_pca3 = pca_components.loc['PC3'].abs().sort_values(ascending=False).head(5)
print("Top 5 features contributing to PCA1:")
print(important_features_pca1)
print("\nTop 5 features contributing to PCA2:")
print(important_features_pca2)
print("\nTop 5 features contributing to PCA3:")
print(important_features_pca3)
Top 5 features contributing to PCA1: equivalent_diameter 0.451671 perimeter 0.446028 area 0.445225 major_axis_length 0.430577 minor_axis_length 0.424077 Name: PC1, dtype: float64 Top 5 features contributing to PCA2: extent 0.637839 solidity 0.531573 eccentricity 0.499679 major_axis_length 0.179739 minor_axis_length 0.132653 Name: PC2, dtype: float64 Top 5 features contributing to PCA3: orientation 0.999614 solidity 0.023722 eccentricity 0.010693 extent 0.009049 major_axis_length 0.002369 Name: PC3, dtype: float64
grouped_df = tot_df_morphological.groupby('ImageName_ScanRegion_Rep').mean(['PC1', 'PC2']).reset_index()
grouped_df['ImageName_ScanRegion'] = grouped_df['ImageName_ScanRegion_Rep'].apply(lambda x: '_'.join(x.split('_')[0:5])).values
grouped_df['Condition_Line'] = grouped_df['ImageName_ScanRegion'].apply(lambda x: sample_dict.get(x, 'Unknown'))
grouped_df['Condition'] = grouped_df['Condition_Line'].apply(lambda x: x.replace('_CTL04E', '').replace('_CTL08A', ''))
grouped_df['Line'] = grouped_df['Condition_Line'].apply(lambda x: x.split('_')[-1])
cmap_conditions = plt.matplotlib.colors.ListedColormap([color_palette[c] for c in grouped_df['Condition'].astype('category').cat.categories])
plt.scatter(
grouped_df['PC1'], grouped_df['PC2'],
#alpha=0.3,
c=grouped_df['Condition'].astype('category').cat.codes,
cmap=cmap_conditions
)
#plt.savefig('./endpoints_figures/endpoints_PCA.pdf', dpi = 300)
<matplotlib.collections.PathCollection at 0x1554cbfb7400>
plt.scatter(
grouped_df['PC2'], grouped_df['PC3'],
#alpha=0.3,
c=grouped_df['Condition'].astype('category').cat.codes,
cmap=cmap_conditions,
)
#plt.savefig('./endpoints_figures/endpoints_PCA_PC2_PC3.pdf', dpi = 300)
<matplotlib.collections.PathCollection at 0x1554c5242f20>
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL08A') &
(tot_df_morphological['Condition'] == cond)]['PC1']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL08A')]['PC1'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL04E') &
(tot_df_morphological['Condition'] == cond)]['PC1']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL04E')]['PC1'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('PC1')
ax[1].set_ylabel('PC1')
plt.savefig('../../endpoints_figures/endpoints_PC1_all_nuclei.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_PC1_all_nuclei.png', dpi=300, bbox_inches='tight')
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL08A') &
(tot_df_morphological['Condition'] == cond)]['PC2']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL08A')]['PC2'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL04E') &
(tot_df_morphological['Condition'] == cond)]['PC2']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL04E')]['PC2'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('PC2')
ax[1].set_ylabel('PC2')
plt.savefig('../../endpoints_figures/endpoints_PC2_all_nuclei.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_PC2_all_nuclei.png', dpi=300, bbox_inches='tight')
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL08A') &
(tot_df_morphological['Condition'] == cond)]['PC3']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL08A')]['PC3'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL04E') &
(tot_df_morphological['Condition'] == cond)]['PC3']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL04E')]['PC3'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('PC3')
ax[1].set_ylabel('PC3')
plt.savefig('../../endpoints_figures/endpoints_PC3_all_nuclei.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_PC3_all_nuclei.png', dpi=300, bbox_inches='tight')
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL08A') &
(grouped_df['Condition'] == cond)]['PC1']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL08A')]['PC1'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL08A')],
y='PC1', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL04E') &
(grouped_df['Condition'] == cond)]['PC1']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL04E')]['PC1'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[grouped_df['Line'] == 'CTL04E'],
y='PC1', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('PC1')
ax[1].set_ylabel('PC1')
plt.savefig('../../endpoints_figures/endpoints_PC1_grouped.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_PC1_grouped.png', dpi=300, bbox_inches='tight')
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL08A') &
(grouped_df['Condition'] == cond)]['PC2']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL08A')]['PC2'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL08A')],
y='PC2', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL04E') &
(grouped_df['Condition'] == cond)]['PC2']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL04E')]['PC2'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL04E')],
y='PC2', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('PC2')
ax[1].set_ylabel('PC2')
plt.savefig('../../endpoints_figures/endpoints_PC2_grouped.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_PC2_grouped.png', dpi=300, bbox_inches='tight')
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL08A') &
(grouped_df['Condition'] == cond)]['PC3']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL08A')]['PC3'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL08A')],
y='PC3', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL04E') &
(grouped_df['Condition'] == cond)]['PC3']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL04E')]['PC3'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL04E')],
y='PC3', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('PC3')
ax[1].set_ylabel('PC3')
plt.savefig('../../endpoints_figures/endpoints_PC3_grouped.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_PC3_grouped.png', dpi=300, bbox_inches='tight')
important_features_pca1 = pca_components.loc['PC1'].sort_values(ascending=False)
import matplotlib.pyplot as plt
# Lollipop plot for important_features_pca1
plt.figure(figsize=(5, 6))
x = important_features_pca1.index
y = important_features_pca1.values
plt.stem(x, y, basefmt=" ")
plt.xticks(rotation=90)
plt.ylabel('Contribution to PC1')
plt.tight_layout()
plt.savefig('../../endpoints_figures/important_features_pca1.pdf', dpi=300, bbox_inches='tight')
important_features_pca2 = pca_components.loc['PC2'].sort_values(ascending=False)
plt.figure(figsize=(5, 6))
x = important_features_pca2.index
y = important_features_pca2.values
plt.stem(x, y, basefmt=" ")
plt.xticks(rotation=90)
plt.ylabel('Contribution to PC2')
plt.tight_layout()
plt.savefig('../../endpoints_figures/important_features_pca2.pdf', dpi=300, bbox_inches='tight')
important_features_pca3 = pca_components.loc['PC3'].sort_values(ascending=False)
plt.figure(figsize=(5, 6))
x = important_features_pca3.index
y = important_features_pca3.values
plt.stem(x, y, basefmt=" ")
plt.xticks(rotation=90)
plt.ylabel('Contribution to PC3')
plt.tight_layout()
plt.savefig('../../endpoints_figures/important_features_pca3.pdf', dpi=300, bbox_inches='tight')
Main contributor of variance for PC1
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL08A') &
(tot_df_morphological['Condition'] == cond)]['equivalent_diameter']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL08A')]['equivalent_diameter'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL04E') &
(tot_df_morphological['Condition'] == cond)]['equivalent_diameter']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL04E')]['equivalent_diameter'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('equivalent_diameter')
ax[1].set_ylabel('equivalent_diameter')
plt.savefig('../../endpoints_figures/endpoints_equivalent_diameter_all_nuclei.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_equivalent_diameter_all_nuclei.png', dpi=300, bbox_inches='tight')
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL08A') &
(grouped_df['Condition'] == cond)]['equivalent_diameter']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL08A')]['equivalent_diameter'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL08A')],
y='equivalent_diameter', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL04E') &
(grouped_df['Condition'] == cond)]['equivalent_diameter']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL04E')]['equivalent_diameter'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL04E')],
y='equivalent_diameter', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('equivalent_diameter')
ax[1].set_ylabel('equivalent_diameter')
plt.savefig('../../endpoints_figures/endpoints_equivalent_diameter_grouped.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_equivalent_diameter_grouped.png', dpi=300, bbox_inches='tight')
Main contributor of variance for PC2
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL08A') &
(tot_df_morphological['Condition'] == cond)]['extent']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL08A')]['extent'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL04E') &
(tot_df_morphological['Condition'] == cond)]['extent']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL04E')]['extent'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('extent')
ax[1].set_ylabel('extent')
plt.savefig('../../endpoints_figures/endpoints_extent_all_nuclei.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_extent_all_nuclei.png', dpi=300, bbox_inches='tight')
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL08A') &
(grouped_df['Condition'] == cond)]['extent']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL08A')]['extent'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL08A') & (grouped_df['Condition'] == cond)],
y='extent', x='Condition', order=[cond], color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL04E') &
(grouped_df['Condition'] == cond)]['extent']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL04E')]['extent'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL04E') & (grouped_df['Condition'] == cond)],
y='extent', x='Condition', order=[cond], color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('extent')
ax[1].set_ylabel('extent')
plt.savefig('../../endpoints_figures/endpoints_extent_grouped.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_extent_grouped.png', dpi=300, bbox_inches='tight')
Main contributor of variance for PC2
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL08A') &
(tot_df_morphological['Condition'] == cond)]['eccentricity']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL08A')]['eccentricity'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL04E') &
(tot_df_morphological['Condition'] == cond)]['eccentricity']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL04E')]['eccentricity'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('eccentricity')
ax[1].set_ylabel('eccentricity')
plt.savefig('../../endpoints_figures/endpoints_eccentricity_all_nuclei.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_eccentricity_all_nuclei.png', dpi=300, bbox_inches='tight')
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL08A') &
(grouped_df['Condition'] == cond)]['eccentricity']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL08A')]['eccentricity'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL08A') & (grouped_df['Condition'] == cond)],
y='eccentricity', x='Condition', order=[cond], color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL04E') &
(grouped_df['Condition'] == cond)]['eccentricity']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL04E')]['eccentricity'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL04E') & (grouped_df['Condition'] == cond)],
y='eccentricity', x='Condition', order=[cond], color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('eccentricity')
ax[1].set_ylabel('eccentricity')
plt.savefig('../../endpoints_figures/endpoints_eccentricity_grouped.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_eccentricity_grouped.png', dpi=300, bbox_inches='tight')
Main contributor of variance for PC3
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL08A') &
(tot_df_morphological['Condition'] == cond)]['orientation']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL08A')]['orientation'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL04E') &
(tot_df_morphological['Condition'] == cond)]['orientation']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL04E')]['orientation'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('orientation')
ax[1].set_ylabel('orientation')
plt.savefig('../../endpoints_figures/endpoints_orientation_all_nuclei.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_orientation_all_nuclei.png', dpi=300, bbox_inches='tight')
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL08A') &
(grouped_df['Condition'] == cond)]['orientation']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL08A')]['orientation'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL08A') & (grouped_df['Condition'] == cond)],
y='orientation', x='Condition', order=[cond], color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL04E') &
(grouped_df['Condition'] == cond)]['orientation']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL04E')]['orientation'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL04E') & (grouped_df['Condition'] == cond)],
y='orientation', x='Condition', order=[cond], color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('orientation')
ax[1].set_ylabel('orientation')
plt.savefig('../../endpoints_figures/endpoints_orientation_grouped.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_orientation_grouped.png', dpi=300, bbox_inches='tight')
tot_df_morphological['area_um2'] = tot_df_morphological['area'] * (0.325 ** 2) # Convert area to um^2
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL08A') &
(tot_df_morphological['Condition'] == cond)]['area_um2']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL08A')]['area_um2'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
for i, cond in enumerate(hormonal_conditions):
data = tot_df_morphological[(tot_df_morphological['Line'] == 'CTL04E') &
(tot_df_morphological['Condition'] == cond)]['area_um2']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = tot_df_morphological[(tot_df_morphological['Condition'] == 'DMSO') &
(tot_df_morphological['Line'] == 'CTL04E')]['area_um2'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('area_um2')
ax[1].set_ylabel('area_um2')
plt.savefig('../../endpoints_figures/endpoints_area_um2_all_nuclei.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_area_um2_all_nuclei.png', dpi=300, bbox_inches='tight')
grouped_df['area_um2'] = grouped_df['area'] * (0.325 ** 2) # Convert area to um^2
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL08A') &
(grouped_df['Condition'] == cond)]['area_um2']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL08A')]['area_um2'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL08A')],
y='area_um2', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = grouped_df[(grouped_df['Line'] == 'CTL04E') &
(grouped_df['Condition'] == cond)]['area_um2']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = grouped_df[(grouped_df['Condition'] == 'DMSO') &
(grouped_df['Line'] == 'CTL04E')]['area_um2'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=grouped_df[(grouped_df['Line'] == 'CTL04E')],
y='area_um2', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('Nuclei area (um2)')
ax[1].set_ylabel('Nuclei area (um2)')
plt.savefig('../../endpoints_figures/endpoints_area_um2_grouped.pdf', dpi=300, bbox_inches='tight')
plt.savefig('../../endpoints_figures/endpoints_area_um2_grouped.png', dpi=300, bbox_inches='tight')
organoid_areas = pd.read_csv('/group/testa/Project/EndPoints/TPSSU/analysis/organoids_area.csv', index_col=0)
organoid_areas
area | |
---|---|
20240424_manuel_lessi_0141_ScanRegion1_rep_0 | 48618374 |
20240424_manuel_lessi_0131_ScanRegion0_rep_2 | 36008220 |
20240424_manuel_lessi_0133_ScanRegion4_rep_0 | 8422522 |
20240424_manuel_lessi_0143_ScanRegion2_rep_1 | 61481135 |
20240424_manuel_lessi_0147_ScanRegion0_rep_1 | 162545379 |
... | ... |
20240424_manuel_lessi_0129_ScanRegion1_rep_0 | 26572184 |
20240424_manuel_lessi_0129_ScanRegion0_rep_0 | 9292198 |
20240424_manuel_lessi_0142_ScanRegion1_rep_2 | 38017446 |
20240424_manuel_lessi_0129_ScanRegion2_rep_1 | 29459387 |
20240424_manuel_lessi_0134_ScanRegion4_rep_1 | 22517768 |
280 rows × 1 columns
organoid_areas['ImageName_ScanRegion'] = organoid_areas.reset_index()['index'].apply(lambda x: '_'.join(x.split('_')[0:5])).values
organoid_areas['Condition_Line'] = organoid_areas['ImageName_ScanRegion'].map(sample_dict)
organoid_areas['Line'] = organoid_areas['Condition_Line'].apply(lambda x: x.split('_')[-1] if pd.notnull(x) else x)
organoid_areas['Condition'] = organoid_areas['Condition_Line'].str.replace('_CTL08A', '', regex=True)
organoid_areas['Condition'] = organoid_areas['Condition'].str.replace('_CTL04E', '', regex=True)
organoid_areas = organoid_areas[organoid_areas['Condition'].isin(hormonal_conditions)]
organoid_areas
area | ImageName_ScanRegion | Condition_Line | Line | Condition | |
---|---|---|---|---|---|
20240424_manuel_lessi_0131_ScanRegion0_rep_2 | 36008220 | 20240424_manuel_lessi_0131_ScanRegion0 | GC_INH_CTL04E | CTL04E | GC_INH |
20240424_manuel_lessi_0133_ScanRegion4_rep_0 | 8422522 | 20240424_manuel_lessi_0133_ScanRegion4 | RET_AG_CTL08A | CTL08A | RET_AG |
20240424_manuel_lessi_0140_ScanRegion4_rep_2 | 32680044 | 20240424_manuel_lessi_0140_ScanRegion4 | THYR_INH_CTL08A | CTL08A | THYR_INH |
20240424_manuel_lessi_0140_ScanRegion0_rep_1 | 36008064 | 20240424_manuel_lessi_0140_ScanRegion0 | GC_INH_CTL08A | CTL08A | GC_INH |
20240424_manuel_lessi_0138_ScanRegion3_rep_1 | 16957648 | 20240424_manuel_lessi_0138_ScanRegion3 | ARYL_AG_CTL08A | CTL08A | ARYL_AG |
... | ... | ... | ... | ... | ... |
20240424_manuel_lessi_0131_ScanRegion4_rep_1 | 35725975 | 20240424_manuel_lessi_0131_ScanRegion4 | THYR_AG_CTL04E | CTL04E | THYR_AG |
20240424_manuel_lessi_0140_ScanRegion3_rep_0 | 46296832 | 20240424_manuel_lessi_0140_ScanRegion3 | THYR_AG_CTL04E | CTL04E | THYR_AG |
20240424_manuel_lessi_0128_ScanRegion4_rep_0 | 42198648 | 20240424_manuel_lessi_0128_ScanRegion4 | ESTR_INH_CTL08A | CTL08A | ESTR_INH |
20240424_manuel_lessi_0129_ScanRegion1_rep_0 | 26572184 | 20240424_manuel_lessi_0129_ScanRegion1 | ARYL_INH_CTL08A | CTL08A | ARYL_INH |
20240424_manuel_lessi_0129_ScanRegion2_rep_1 | 29459387 | 20240424_manuel_lessi_0129_ScanRegion2 | ARYL_INH_CTL04E | CTL04E | ARYL_INH |
146 rows × 5 columns
organoid_areas['area_um2'] = organoid_areas['area'] * (0.325 ** 2) # Convert area to um^2
/tmp/ipykernel_3750874/172599718.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy organoid_areas['area_um2'] = organoid_areas['area'] * (0.325 ** 2) # Convert area to um^2
summed_area = tot_df_morphological[['ImageName_ScanRegion_Rep', 'area_um2']].groupby('ImageName_ScanRegion_Rep').sum()
organoid_areas['tot_nuclei_areas'] = summed_area
/tmp/ipykernel_3750874/1334222756.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy organoid_areas['tot_nuclei_areas'] = summed_area
organoid_areas['nuclear_area'] = organoid_areas['tot_nuclei_areas'] / organoid_areas['area_um2'] * 100
/tmp/ipykernel_3750874/3125954352.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy organoid_areas['nuclear_area'] = organoid_areas['tot_nuclei_areas'] / organoid_areas['area_um2'] * 100
organoid_areas
area | ImageName_ScanRegion | Condition_Line | Line | Condition | area_um2 | tot_nuclei_areas | nuclear_area | |
---|---|---|---|---|---|---|---|---|
20240424_manuel_lessi_0131_ScanRegion0_rep_2 | 36008220 | 20240424_manuel_lessi_0131_ScanRegion0 | GC_INH_CTL04E | CTL04E | GC_INH | 3.803368e+06 | 1.145734e+06 | 30.124186 |
20240424_manuel_lessi_0133_ScanRegion4_rep_0 | 8422522 | 20240424_manuel_lessi_0133_ScanRegion4 | RET_AG_CTL08A | CTL08A | RET_AG | 8.896289e+05 | 2.439548e+05 | 27.422083 |
20240424_manuel_lessi_0140_ScanRegion4_rep_2 | 32680044 | 20240424_manuel_lessi_0140_ScanRegion4 | THYR_INH_CTL08A | CTL08A | THYR_INH | 3.451830e+06 | 1.302871e+06 | 37.744374 |
20240424_manuel_lessi_0140_ScanRegion0_rep_1 | 36008064 | 20240424_manuel_lessi_0140_ScanRegion0 | GC_INH_CTL08A | CTL08A | GC_INH | 3.803352e+06 | 1.457129e+06 | 38.311710 |
20240424_manuel_lessi_0138_ScanRegion3_rep_1 | 16957648 | 20240424_manuel_lessi_0138_ScanRegion3 | ARYL_AG_CTL08A | CTL08A | ARYL_AG | 1.791152e+06 | 6.871573e+05 | 38.363994 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
20240424_manuel_lessi_0131_ScanRegion4_rep_1 | 35725975 | 20240424_manuel_lessi_0131_ScanRegion4 | THYR_AG_CTL04E | CTL04E | THYR_AG | 3.773556e+06 | 1.162948e+06 | 30.818358 |
20240424_manuel_lessi_0140_ScanRegion3_rep_0 | 46296832 | 20240424_manuel_lessi_0140_ScanRegion3 | THYR_AG_CTL04E | CTL04E | THYR_AG | 4.890103e+06 | 1.751093e+06 | 35.808912 |
20240424_manuel_lessi_0128_ScanRegion4_rep_0 | 42198648 | 20240424_manuel_lessi_0128_ScanRegion4 | ESTR_INH_CTL08A | CTL08A | ESTR_INH | 4.457232e+06 | 1.589967e+06 | 35.671624 |
20240424_manuel_lessi_0129_ScanRegion1_rep_0 | 26572184 | 20240424_manuel_lessi_0129_ScanRegion1 | ARYL_INH_CTL08A | CTL08A | ARYL_INH | 2.806687e+06 | 1.140692e+06 | 40.641951 |
20240424_manuel_lessi_0129_ScanRegion2_rep_1 | 29459387 | 20240424_manuel_lessi_0129_ScanRegion2 | ARYL_INH_CTL04E | CTL04E | ARYL_INH | 3.111648e+06 | 1.188860e+06 | 38.206776 |
146 rows × 8 columns
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = organoid_areas[(organoid_areas['Line'] == 'CTL08A') &
(organoid_areas['Condition'] == cond)]['nuclear_area']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = organoid_areas[(organoid_areas['Condition'] == 'DMSO') &
(organoid_areas['Line'] == 'CTL08A')]['nuclear_area'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=organoid_areas[(organoid_areas['Line'] == 'CTL08A')],
y='nuclear_area', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = organoid_areas[(organoid_areas['Line'] == 'CTL04E') &
(organoid_areas['Condition'] == cond)]['nuclear_area']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = organoid_areas[(organoid_areas['Condition'] == 'DMSO') &
(organoid_areas['Line'] == 'CTL04E')]['nuclear_area'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=organoid_areas[(organoid_areas['Line'] == 'CTL04E')],
y='nuclear_area', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('nuclear_area')
ax[1].set_ylabel('nuclear_area')
Text(0, 0.5, 'nuclear_area')
measurements_files = glob.glob("/group/testa/Project/EndPoints/TPSSU/analysis/measurements/*.csv")
measurements_files_mix_1 = [f for f in measurements_files if any(name in f for name in mix_1_names)]
tot_df_mix1_ch2 = pd.DataFrame()
measurements_files_mix_1_ch2 = [f for f in measurements_files_mix_1 if 'channel_2' in f]
for file in measurements_files_mix_1_ch2:
print(file)
df = pd.read_csv(file, index_col=0)
df['ImageName'] = file.split("/")[-1].split(".")[0]
print(df.shape)
tot_df_mix1_ch2 = pd.concat([tot_df_mix1_ch2, df], axis=0)
/group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion3_rep_1_channel_2_measures.csv (14032, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion2_rep_0_channel_2_measures.csv (36257, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion3_rep_1_channel_2_measures.csv (29165, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion2_rep_0_channel_2_measures.csv (28234, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion1_rep_2_channel_2_measures.csv (5186, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion2_rep_1_channel_2_measures.csv (30598, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion0_rep_0_channel_2_measures.csv (4420, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion4_rep_0_channel_2_measures.csv (5637, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion5_rep_2_channel_2_measures.csv (44878, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion2_rep_1_channel_2_measures.csv (25453, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion5_rep_0_channel_2_measures.csv (14547, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion2_rep_0_channel_2_measures.csv (65200, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion4_rep_1_channel_2_measures.csv (18423, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion0_rep_2_channel_2_measures.csv (12038, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion0_rep_1_channel_2_measures.csv (52451, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion0_rep_0_channel_2_measures.csv (69681, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion3_rep_0_channel_2_measures.csv (60277, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion7_rep_0_channel_2_measures.csv (36423, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion2_rep_1_channel_2_measures.csv (54724, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion3_rep_2_channel_2_measures.csv (48396, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion1_rep_0_channel_2_measures.csv (18479, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion5_rep_0_channel_2_measures.csv (61473, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion3_rep_2_channel_2_measures.csv (62393, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion5_rep_2_channel_2_measures.csv (27998, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion4_rep_0_channel_2_measures.csv (16418, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion0_rep_0_channel_2_measures.csv (54097, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion0_rep_1_channel_2_measures.csv (19128, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion2_rep_2_channel_2_measures.csv (7446, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion0_rep_0_channel_2_measures.csv (6943, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion3_rep_2_channel_2_measures.csv (44660, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion2_rep_0_channel_2_measures.csv (6134, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion2_rep_0_channel_2_measures.csv (43655, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion5_rep_0_channel_2_measures.csv (55824, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion4_rep_0_channel_2_measures.csv (43161, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion4_rep_2_channel_2_measures.csv (26186, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion5_rep_2_channel_2_measures.csv (42766, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion1_rep_0_channel_2_measures.csv (32479, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion0_rep_2_channel_2_measures.csv (46684, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion1_rep_1_channel_2_measures.csv (7889, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion7_rep_1_channel_2_measures.csv (1, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion3_rep_1_channel_2_measures.csv (4878, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion5_rep_1_channel_2_measures.csv (24682, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion3_rep_1_channel_2_measures.csv (31993, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion3_rep_2_channel_2_measures.csv (48939, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion4_rep_2_channel_2_measures.csv (1405, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion0_rep_2_channel_2_measures.csv (50003, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion4_rep_1_channel_2_measures.csv (26358, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion1_rep_1_channel_2_measures.csv (4685, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion0_rep_1_channel_2_measures.csv (42864, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion4_rep_1_channel_2_measures.csv (12551, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion1_rep_0_channel_2_measures.csv (128250, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion2_rep_2_channel_2_measures.csv (68825, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion0_rep_1_channel_2_measures.csv (10615, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion2_rep_2_channel_2_measures.csv (54944, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion2_rep_1_channel_2_measures.csv (32605, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion2_rep_0_channel_2_measures.csv (23699, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion3_rep_1_channel_2_measures.csv (20919, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion4_rep_2_channel_2_measures.csv (33055, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion3_rep_0_channel_2_measures.csv (70503, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion1_rep_0_channel_2_measures.csv (12349, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion1_rep_1_channel_2_measures.csv (30782, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion6_rep_0_channel_2_measures.csv (22787, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion4_rep_0_channel_2_measures.csv (42538, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion5_rep_1_channel_2_measures.csv (31370, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion3_rep_0_channel_2_measures.csv (4465, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion0_rep_1_channel_2_measures.csv (24746, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion1_rep_1_channel_2_measures.csv (5061, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion5_rep_2_channel_2_measures.csv (45127, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion1_rep_1_channel_2_measures.csv (12207, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion5_rep_0_channel_2_measures.csv (36668, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion4_rep_0_channel_2_measures.csv (22478, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion0_rep_0_channel_2_measures.csv (75915, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion4_rep_0_channel_2_measures.csv (881, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion1_rep_2_channel_2_measures.csv (25173, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion0_rep_1_channel_2_measures.csv (9836, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion7_rep_2_channel_2_measures.csv (27770, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion5_rep_0_channel_2_measures.csv (52956, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion2_rep_2_channel_2_measures.csv (55563, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion1_rep_1_channel_2_measures.csv (92340, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion2_rep_1_channel_2_measures.csv (2185, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion2_rep_2_channel_2_measures.csv (13307, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion3_rep_0_channel_2_measures.csv (47582, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion0_rep_2_channel_2_measures.csv (1122, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion1_rep_0_channel_2_measures.csv (5646, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion6_rep_1_channel_2_measures.csv (22272, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion5_rep_1_channel_2_measures.csv (59825, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion3_rep_0_channel_2_measures.csv (11258, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion4_rep_2_channel_2_measures.csv (6712, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion1_rep_0_channel_2_measures.csv (26007, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion7_rep_3_channel_2_measures.csv (72679, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion2_rep_1_channel_2_measures.csv (17407, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion6_rep_2_channel_2_measures.csv (4246, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion5_rep_1_channel_2_measures.csv (79266, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion4_rep_1_channel_2_measures.csv (10935, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion2_rep_0_channel_2_measures.csv (24890, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion0_rep_1_channel_2_measures.csv (24601, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion5_rep_2_channel_2_measures.csv (58504, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion2_rep_2_channel_2_measures.csv (7832, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion2_rep_1_channel_2_measures.csv (194410, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion1_rep_0_channel_2_measures.csv (2649, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion0_rep_2_channel_2_measures.csv (35739, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion5_rep_1_channel_2_measures.csv (72165, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion4_rep_0_channel_2_measures.csv (79123, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion4_rep_1_channel_2_measures.csv (17120, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion3_rep_0_channel_2_measures.csv (43389, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion3_rep_2_channel_2_measures.csv (19504, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0131_ScanRegion4_rep_1_channel_2_measures.csv (38346, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0128_ScanRegion0_rep_0_channel_2_measures.csv (70479, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0134_ScanRegion3_rep_0_channel_2_measures.csv (50144, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion3_rep_2_channel_2_measures.csv (1195, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion3_rep_1_channel_2_measures.csv (54110, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0133_ScanRegion4_rep_1_channel_2_measures.csv (4832, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0132_ScanRegion3_rep_1_channel_2_measures.csv (39112, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0130_ScanRegion0_rep_0_channel_2_measures.csv (41176, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion3_rep_3_channel_2_measures.csv (9818, 19) /group/testa/Project/EndPoints/TPSSU/analysis/measurements/20240424_manuel_lessi_0129_ScanRegion3_rep_4_channel_2_measures.csv (23465, 19)
tot_pos_SOX2 = {}
density_SOX2 = {}
n_nuclei = {}
n_nuclei_tot_pos_SOX2 = {}
tot_area_SOX2 = {}
tot_area_no_SOX2 = {}
for file in measurements_files_mix_1_ch2:
#print(file)
df = pd.read_csv(file, index_col=0)
imageName_scanRegion_rep = file.split('/')[-1].split('.')[0].replace('_channel_2_measures', '')
imageName_scanRegion = '_'.join(imageName_scanRegion_rep.split('_')[:5])
if imageName_scanRegion_rep in good_tissue_to_keep_limit and imageName_scanRegion in sample_sheet['ImageName_ScanRegion'].values:
df['ImageName'] = file.split("/")[-1].split(".")[0]
tot_pos_SOX2[file.split("/")[-1].split(".")[0]] = {}
density_SOX2[imageName_scanRegion_rep] = {}
n_nuclei_tot_pos_SOX2[imageName_scanRegion_rep] = {}
df['area_um2'] = df['area'] * (0.325 ** 2) # Convert area to um^2
min_quantile = np.quantile(df['area_um2'], 0.01)
max_quantile = np.quantile(df['area_um2'], 0.99)
df = df[(df['area_um2'] > min_quantile) & (df['area_um2'] < max_quantile)]
n_nuclei[file.split("/")[-1].split(".")[0]] = df.shape[0]
otsu_thrs = filters.threshold_multiotsu(np.array(df['intensity_mean']), classes = 4)
for i, thr in enumerate(otsu_thrs):
tot_pos_SOX2[file.split("/")[-1].split(".")[0]][f'otsu_{i}'] = np.sum(df['intensity_mean'] > thr) / df.shape[0] * 100
n_nuclei_tot_pos_SOX2[imageName_scanRegion_rep][f'otsu_{i}'] = np.sum(df['intensity_mean'] > thr)
if i == 1:
tot_area_SOX2[imageName_scanRegion_rep] = df[df['intensity_mean'] > thr]['area_um2'].sum()
tot_area_no_SOX2[imageName_scanRegion_rep] = df[df['intensity_mean'] <= thr]['area_um2'].sum()
organoid_areas['SOX2_pos_area'] = organoid_areas.index.map(tot_area_SOX2)
organoid_areas['SOX2_neg_area'] = organoid_areas.index.map(tot_area_no_SOX2)
/tmp/ipykernel_3750874/4008132345.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy organoid_areas['SOX2_pos_area'] = organoid_areas.index.map(tot_area_SOX2) /tmp/ipykernel_3750874/4008132345.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy organoid_areas['SOX2_neg_area'] = organoid_areas.index.map(tot_area_no_SOX2)
organoid_areas['SOX2_pos_area_over_tot'] = organoid_areas['SOX2_pos_area'] / organoid_areas['area_um2'] * 100
organoid_areas['SOX2_neg_area_over_tot'] = organoid_areas['SOX2_neg_area'] / organoid_areas['area_um2'] * 100
/tmp/ipykernel_3750874/1300664090.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy organoid_areas['SOX2_pos_area_over_tot'] = organoid_areas['SOX2_pos_area'] / organoid_areas['area_um2'] * 100 /tmp/ipykernel_3750874/1300664090.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy organoid_areas['SOX2_neg_area_over_tot'] = organoid_areas['SOX2_neg_area'] / organoid_areas['area_um2'] * 100
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
organoid_area_mix1 = organoid_areas[~organoid_areas['SOX2_pos_area'].isna()]
for i, cond in enumerate(hormonal_conditions):
data = organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL08A') &
(organoid_area_mix1['Condition'] == cond)]['nuclear_area']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = organoid_area_mix1[(organoid_area_mix1['Condition'] == 'DMSO') &
(organoid_area_mix1['Line'] == 'CTL08A')]['nuclear_area'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL08A')],
y='nuclear_area', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL04E') &
(organoid_area_mix1['Condition'] == cond)]['nuclear_area']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = organoid_area_mix1[(organoid_area_mix1['Condition'] == 'DMSO') &
(organoid_area_mix1['Line'] == 'CTL04E')]['nuclear_area'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL04E')],
y='nuclear_area', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('nuclear_area/organoid area')
ax[1].set_ylabel('nuclear_area/organoid area')
Text(0, 0.5, 'nuclear_area/organoid area')
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
organoid_area_mix1 = organoid_areas[~organoid_areas['SOX2_pos_area'].isna()]
for i, cond in enumerate(hormonal_conditions):
data = organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL08A') &
(organoid_area_mix1['Condition'] == cond)]['SOX2_pos_area_over_tot']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = organoid_area_mix1[(organoid_area_mix1['Condition'] == 'DMSO') &
(organoid_area_mix1['Line'] == 'CTL08A')]['SOX2_pos_area_over_tot'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL08A')],
y='SOX2_pos_area_over_tot', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL04E') &
(organoid_area_mix1['Condition'] == cond)]['SOX2_pos_area_over_tot']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = organoid_area_mix1[(organoid_area_mix1['Condition'] == 'DMSO') &
(organoid_area_mix1['Line'] == 'CTL04E')]['SOX2_pos_area_over_tot'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL04E')],
y='SOX2_pos_area_over_tot', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('SOX2_pos_area_over_tot')
ax[1].set_ylabel('SOX2_pos_area_over_tot')
Text(0, 0.5, 'SOX2_pos_area_over_tot')
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
for i, cond in enumerate(hormonal_conditions):
data = organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL08A') &
(organoid_area_mix1['Condition'] == cond)]['SOX2_neg_area_over_tot']
box = ax[0].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
# Add stripplot for CTL08A (single points per box)
dmso_median = organoid_area_mix1[(organoid_area_mix1['Condition'] == 'DMSO') &
(organoid_area_mix1['Line'] == 'CTL08A')]['SOX2_neg_area_over_tot'].median()
ax[0].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL08A')],
y='SOX2_neg_area_over_tot', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[0], size=8
)
for i, cond in enumerate(hormonal_conditions):
data = organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL04E') &
(organoid_area_mix1['Condition'] == cond)]['SOX2_neg_area_over_tot']
box = ax[1].boxplot(data, positions=[i], patch_artist=True, widths=.8)
for patch in box['boxes']:
patch.set_facecolor(color_palette[cond])
box['medians'][0].set_color('black')
dmso_median = organoid_area_mix1[(organoid_area_mix1['Condition'] == 'DMSO') &
(organoid_area_mix1['Line'] == 'CTL04E')]['SOX2_neg_area_over_tot'].median()
ax[1].axhline(dmso_median, color='red', linestyle='--', linewidth=.7, alpha=0.7)
sns.stripplot(
data=organoid_area_mix1[(organoid_area_mix1['Line'] == 'CTL04E')],
y='SOX2_neg_area_over_tot', x='Condition', order=hormonal_conditions, color='black', alpha=0.5, jitter=False, ax=ax[1], size=8
)
_ = ax[0].set_xticks(range(len(hormonal_conditions)))
_ = ax[0].set_xticklabels(hormonal_conditions, rotation=90)
_ = ax[1].set_xticks(range(len(hormonal_conditions)))
_ = ax[1].set_xticklabels(hormonal_conditions, rotation=90)
ax[0].set_title('CTL08A')
ax[1].set_title('CTL04E')
ax[0].set_ylabel('SOX2_neg_area_over_tot')
ax[1].set_ylabel('SOX2_neg_area_over_tot')
Text(0, 0.5, 'SOX2_neg_area_over_tot')