##############
## input: combined_scRNA_seq_data.h5ad
## output: combined_scRNA_seq_data_with_genetic_demux_filtered.h5ad
##############


import scanpy as sc
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import seaborn as sns
import anndata as ad


output_dir = "/group/testa/Project/EndPoints/scRNASeq/preAnalysis/output"

# Directory containing aggregatedCall files
demux_dir = '/group/testa/Common/scRNAseq/demultiplexing/endpoints_sc_2024-cellranger_human_GRCh38-2020-A/alessia.valenti/'


adata = sc.read_h5ad("/group/testa/Project/EndPoints/scRNASeq/preAnalysis/Archive/combined_scRNA_seq_data_39X.h5ad")
print(f"Loaded dataset shape: {adata.shape}")

Loaded dataset shape: (621573, 36601)


adata_original = adata.copy()


adata.obs['run'] = adata.obs['run'].astype(int)


adata.obs.head()


print( set([393, 394, *[i for i in range(491, 513)] ]) - set(sorted(list(adata.obs.run.unique()))))

set()


fig, axes = plt.subplots(2, 2, figsize=(12, 8))

axes = axes.flatten()

columns = ['run', 'line', 'condition', 'replicate']

for ax, column in zip(axes, columns):
    adata.obs[column].value_counts().plot.bar(ax=ax, color='gray')
    ax.set_title(column)
    ax.set_ylabel('Count')
    ax.tick_params(axis='x', rotation=90)

plt.tight_layout()
plt.show()


# Function to read aggregatedCall files
def read_aggregated_call(file_path):
    return pd.read_csv(file_path, sep='\t', index_col='barcode')


# Read all aggregatedCall files
aggregatedCalls = {}
for run_dir in glob.glob(os.path.join(demux_dir, 'MTR_*')):
    run_name = os.path.basename(run_dir)
    aggregated_call_file = os.path.join(run_dir, 'aggregatedCall', 'aggregatedCall.tsv')
    if os.path.exists(aggregated_call_file):
        aggregatedCalls[run_name] = read_aggregated_call(aggregated_call_file)
        aggregatedCalls[run_name]["run"] = int(run_name.split("_")[-1])


print(sorted(list(aggregatedCalls.keys())))

runs = [int(i.split("_")[-1]) for i in list(aggregatedCalls.keys())]

['MTR_393', 'MTR_394', 'MTR_491', 'MTR_492', 'MTR_493', 'MTR_494', 'MTR_495', 'MTR_496', 'MTR_497', 'MTR_498', 'MTR_499', 'MTR_500', 'MTR_501', 'MTR_502', 'MTR_503', 'MTR_504', 'MTR_505', 'MTR_506', 'MTR_507', 'MTR_508', 'MTR_509', 'MTR_510', 'MTR_511', 'MTR_512', 'MTR_513']


aggregatedCalls['MTR_507'].head()


print( set( [393, 394, *[i for i in range(491, 513)] ]) - set(sorted(runs)))

set()


aggregatedCalls_combined = pd.concat(aggregatedCalls.values(), ignore_index=False)


aggregatedCalls_combined = aggregatedCalls_combined.rename_axis(None)


aggregatedCalls_combined['run'] = aggregatedCalls_combined['run'].astype(int)


aggregatedCalls_combined = aggregatedCalls_combined.rename(columns={
    'Consensus': 'Consensus_gene_demux',
    'run': 'run_gene_demux'
})


aggregatedCalls_combined.head()


if aggregatedCalls_combined.index.duplicated().any():
    duplicate_count = aggregatedCalls_combined.index.duplicated().sum()
    print(f"Warning: {duplicate_count} duplicate indices found in aggregatedCalls_combined.")
    # processed_data_combined = processed_data_combined[~processed_data_combined.index.duplicated(keep='first')]
    # processed_data_combined = aggregatedCalls_combined[~aggregatedCalls_combined.index.duplicated(keep=False)]
    # print(f"Removed {duplicate_count} rows. New shape of processed_data_combined: {processed_data_combined.shape}")

Warning: 106982 duplicate indices found in aggregatedCalls_combined.


aggregatedCalls_combined = aggregatedCalls_combined.reset_index()
aggregatedCalls_combined['unique_index'] = aggregatedCalls_combined["index"].astype(str) + '-' + aggregatedCalls_combined['run_gene_demux'].astype(str)
aggregatedCalls_combined = aggregatedCalls_combined.set_index('unique_index')


aggregatedCalls_combined.head()


if aggregatedCalls_combined.index.duplicated().any():
    duplicate_count = aggregatedCalls_combined.index.duplicated().sum()
    print(f"Warning: {duplicate_count} duplicate indices found in aggregatedCalls_combined.")


runs_aggregatedCalls = set(aggregatedCalls_combined['run_gene_demux'].unique())
print(f"runs_aggregatedCalls_combined: {runs_aggregatedCalls}")
runs_adata = set(adata.obs['run'].unique())
print(f"runs_adata: {runs_adata}")

print(f"Runs not shared: {runs_aggregatedCalls.symmetric_difference(runs_adata)}")

runs_aggregatedCalls_combined: {512, 513, 393, 394, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511}
runs_adata: {512, 513, 393, 394, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511}
Runs not shared: set()


# runs_to_keep = runs_aggregatedCalls.intersection(runs_adata)

# Remove non-shared runs from aggregatedCalls_combined
# aggregatedCalls_combined = aggregatedCalls_combined[aggregatedCalls_combined['run_gene_demux'].isin(runs_to_keep)]

# Remove non-shared runs from adata
# adata = adata[adata.obs['run'].isin(runs_to_keep)]


fig, axes = plt.subplots(1, 2, figsize=(12, 8))

axes = axes.flatten()

columns = ['Consensus_gene_demux', 'run_gene_demux']

for ax, column in zip(axes, columns):
    aggregatedCalls_combined[column].value_counts().plot.bar(ax=ax, color='gray')
    ax.set_title(column)
    ax.set_ylabel('Count')
    ax.tick_params(axis='x', rotation=90)

plt.tight_layout()
plt.show()


adata.obs = adata.obs.join(aggregatedCalls_combined, how='left')


print(sorted(list(adata.obs.run.unique())))

[393, 394, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513]


total_rows_aggregatedCalls_combined = aggregatedCalls_combined.shape[0]
total_rows_adata = adata.obs.shape[0]

# Count rows where new columns are not NaN
combined_rows = adata.obs.dropna(subset=['Consensus_gene_demux', 'run_gene_demux']).shape[0]

print(f"Total rows in adata.obs: {total_rows_adata}")
print(f"Total rows in aggregatedCalls_combined: {total_rows_aggregatedCalls_combined}")
print(f"Rows in adata.obs with data from aggregatedCalls_combined: {combined_rows}")
print('\n')
percentage_combined = (combined_rows / total_rows_adata) * 100
print(f"Percentage of rows from adata.obs: {percentage_combined:.2f}%")
percentage_combined = (combined_rows / total_rows_aggregatedCalls_combined) * 100
print(f"Percentage of rows from aggregatedCalls_combined: {percentage_combined:.2f}%")

Total rows in adata.obs: 621573
Total rows in aggregatedCalls_combined: 937779
Rows in adata.obs with data from aggregatedCalls_combined: 621573


Percentage of rows from adata.obs: 100.00%
Percentage of rows from aggregatedCalls_combined: 66.28%


# Check for any rows in adata.obs that didn't match
unmatched_rows = adata.obs.index.difference(aggregatedCalls_combined.index)
print(f"Number of rows in adata.obs that didn't match to aggregatedCalls_combined: {len(unmatched_rows)}")

if len(unmatched_rows) > 0:
    print("First few unmatched row indexes:")
    print(unmatched_rows[:5].tolist())

Number of rows in adata.obs that didn't match to aggregatedCalls_combined: 0


# Check for any rows in aggregatedCalls_combined that didn't match
unmatched_rows = aggregatedCalls_combined.index.difference(adata.obs.index)
print(f"Number of rows in aggregatedCalls_combined that didn't match to adata.obs: {len(unmatched_rows)}")

if len(unmatched_rows) > 0:
    print("First few unmatched row indexes:")
    print(unmatched_rows[:5].tolist())

Number of rows in aggregatedCalls_combined that didn't match to adata.obs: 316206
First few unmatched row indexes:
['AAACCCAAGAACTGAT-1-501', 'AAACCCAAGAAGCGCT-1-491', 'AAACCCAAGAAGCTCG-1-505', 'AAACCCAAGAAGTGTT-1-493', 'AAACCCAAGACAAGCC-1-510']


unmatched = aggregatedCalls_combined.loc[unmatched_rows]
unmatched.head()


sorted_runs = sorted(unmatched['run_gene_demux'].unique())
unmatched['run_gene_demux'].value_counts().reindex(sorted_runs).plot.bar(color='orange')

<Axes: xlabel='run_gene_demux'>


unmatched['Consensus_gene_demux'].unique()

array(['doublet', 'CTL04E', 'CTL08A', 'LowQuality'], dtype=object)


unmatched['Consensus_gene_demux'].value_counts().plot.bar(color=['orange'])

<Axes: xlabel='Consensus_gene_demux'>


# Count the occurrences of each run_gene_demux
run_counts = unmatched['run_gene_demux'].value_counts()

# Calculate the counts of doublets for each run
doublet_counts = unmatched[unmatched['Consensus_gene_demux'] == 'doublet']['run_gene_demux'].value_counts()

# Ensure all runs are represented in doublet_counts
doublet_counts = doublet_counts.reindex(run_counts.index, fill_value=0)

# Calculate the non-doublet counts
non_doublet_counts = run_counts - doublet_counts

df = pd.DataFrame({'Non-doublet': non_doublet_counts, 'Doublet': doublet_counts})
df['Total'] = df['Non-doublet'] + df['Doublet']
df = df.sort_values('Total', ascending=False)

fig, ax = plt.subplots(figsize=(15, 8))

# Plot the stacked bar chart
bar_width = 0.8
bars = ax.bar(range(len(df)), df['Non-doublet'], color='orange', width=bar_width, label='Non-doublet')
ax.bar(range(len(df)), df['Doublet'], bottom=df['Non-doublet'], color='red', width=bar_width, label='Doublet')

ax.set_xlabel('run_gene_demux')
ax.set_ylabel('Count')
ax.set_title('run_gene_demux Counts with Doublet Proportion')

ax.set_xticks(range(len(df)))
ax.set_xticklabels(df.index, rotation=90, ha='right')

ax.legend()

plt.tight_layout()
plt.show()


adata

AnnData object with n_obs × n_vars = 621573 × 36601
    obs: 'tags', 'run', 'cmo', 'condition', 'line', 'replicate', 'condition_clean', 'index', 'Consensus_gene_demux', 'run_gene_demux'
    var: 'gene_ids', 'feature_types', 'genome'


adata.obs.head()


print(sorted(list(adata.obs.run.unique())))

[393, 394, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513]


# adata.write_h5ad(os.path.join(output_dir, 'combined_scRNA_seq_data_with_genetic_demux.h5ad'))


print(f"Number of cells before filtering: {adata.n_obs}")

Number of cells before filtering: 621573


adata.obs[adata.obs.run == 508].head()


mask = ~adata.obs['Consensus_gene_demux'].isna() & (adata.obs['Consensus_gene_demux'] != '')

adata = adata[mask, :]

print(f"Number of cells after filtering NaN: {adata.n_obs}")

Number of cells after filtering NaN: 621573


mask = adata.obs['Consensus_gene_demux'] != 'LowQuality'

adata = adata[mask, :]

print(f"Number of cells after filtering LowQuality: {adata.n_obs}")

Number of cells after filtering LowQuality: 605046


mask = adata.obs['Consensus_gene_demux'] != 'doublet'

adata = adata[mask, :]

print(f"Number of cells after filtering doublet: {adata.n_obs}")

Number of cells after filtering doublet: 494164


adata.obs.head()


print(sorted(list(adata.obs.run.unique())))

[393, 394, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513]


adata.obs.Consensus_gene_demux.unique()

array(['CTL08A', 'CTL04E', 'CTL01A', 'CTL02A'], dtype=object)


# adata.write_h5ad(os.path.join(output_dir, 'combined_scRNA_seq_data_with_genetic_demux_filtered.h5ad'))


# adata = ad.read_h5ad(os.path.join(output_dir, 'combined_scRNA_seq_data_with_genetic_demux_filtered.h5ad'))


adata[adata.obs['Consensus_gene_demux'].isin(['CTL01A', 'CTL02A'])].obs.head()


adata[adata.obs.run == 508].obs.head()


def check_consistency(df):
    # Create a mask for rows to include in the consistency check
    include_mask = ~df.run.isin([508,393,394])
    
    run_consistent = (df['run'] == df['run_gene_demux'])

    tag_consistent = (df['tags'] == df['index'])
    
    # Convert categorical columns to string for comparison
    line_str = df['line'].astype(str)
    consensus_str = df['Consensus_gene_demux'].astype(str)
    
    line_consistent = ((line_str == consensus_str) | ~include_mask)
    
    return run_consistent & line_consistent & tag_consistent


# consistency check
adata.obs['is_consistent'] = check_consistency(adata.obs)

# Count inconsistencies and calculate consistency percentage
total_count = len(adata.obs)
included_count = (~adata.obs.run.isin([508,393,394])).sum()
inconsistent_count = (~adata.obs['is_consistent']).sum()
consistency_percentage = (adata.obs['is_consistent'].sum()/ total_count) * 100

print(f"Total rows: {total_count}")
print(f"Rows included in consistency check: {included_count}")
print(f"Inconsistent rows: {inconsistent_count}")
print(f"Consistency percentage: {consistency_percentage:.2f}%")

Total rows: 494164
Rows included in consistency check: 409234
Inconsistent rows: 13664
Consistency percentage: 97.23%


none_count = adata.obs['is_consistent'].isna().sum()
print(f"Number of None values remaining: {none_count}")

Number of None values remaining: 0


adata.obs['is_consistent'] = adata.obs['is_consistent'].fillna(True)


adata.obs[~adata.obs['is_consistent']].head()


print(adata.obs.Consensus_gene_demux.unique())
print(adata.obs.line.unique())

['CTL08A' 'CTL04E' 'CTL01A' 'CTL02A']
['CTL08A', 'CTL04E', NaN]
Categories (2, object): ['CTL04E', 'CTL08A']


if inconsistent_count > 0:
    # Analyze types of inconsistencies
    run_mismatch = adata.obs[adata.obs['run'] != adata.obs['run_gene_demux']]

    tag_mismatch = adata.obs[adata.obs['tags'] != adata.obs['index']]
    
    # Convert categorical columns to string for comparison
    line_str = adata.obs['line'].astype(str)
    consensus_str = adata.obs['Consensus_gene_demux'].astype(str)
    
    # Exclude 'CTL01A' and 'CTL02A' from line mismatch check
    line_mismatch = adata.obs[
        (line_str != consensus_str) & 
        ~(adata.obs['Consensus_gene_demux'].isin(['CTL01A', 'CTL02A']) | (adata.obs.run == 508))
    ]
    
    print(f"Rows with mismatched run: {len(run_mismatch)}")
    print(f"Rows with mismatched tags: {len(tag_mismatch)}")
    print(f"Rows with mismatched line/Consensus: {len(line_mismatch)}")

Rows with mismatched run: 0
Rows with mismatched tags: 0
Rows with mismatched line/Consensus: 47480


n_cells_before = adata.n_obs

# Filter the AnnData object to keep only consistent records
adata_filtered = adata[adata.obs['is_consistent']]

n_cells_after = adata_filtered.n_obs

print(f"Number of cells before filtering: {n_cells_before}")
print(f"Number of cells after filtering: {n_cells_after}")
print(f"Number of cells removed: {n_cells_before - n_cells_after}")
print(f"Percentage of cells kept: {(n_cells_after / n_cells_before) * 100:.2f}%")

Number of cells before filtering: 494164
Number of cells after filtering: 480500
Number of cells removed: 13664
Percentage of cells kept: 97.23%


adata_filtered.obs.head()


adata_filtered.obs = adata_filtered.obs.drop(columns=['is_consistent'])
adata_filtered.obs = adata_filtered.obs.drop(columns=['index'])
adata_filtered.obs = adata_filtered.obs.drop(columns=['run'])
adata_filtered.obs = adata_filtered.obs.drop(columns=['line'])


adata_filtered.obs.head()


adata_filtered.obs = adata_filtered.obs.rename(columns={
    'Consensus_gene_demux': 'line', 
    'run_gene_demux': 'run'
    })


adata_filtered.obs['cmo'] = pd.to_numeric(adata_filtered.obs['cmo'], errors='coerce')
    
# Replace NaN with the specified value
adata_filtered.obs['cmo'] = adata_filtered.obs['cmo'].fillna(-1)

# Convert to integer
adata_filtered.obs['cmo'] = adata_filtered.obs['cmo'].astype(int)

# Print some information about the conversion
print(f"Unique values in 'cmo' after conversion: {adata_filtered.obs['cmo'].unique()}")
print(f"Number of -1 values (originally NaN): {(adata_filtered.obs['cmo'] == -1).sum()}")

Unique values in 'cmo' after conversion: [306 305 303 301 302 304 312 310 309 308 307 311  -1]
Number of -1 values (originally NaN): 84930


adata_filtered[adata_filtered.obs.cmo==-1].obs.head()


fig, axes = plt.subplots(2, 2, figsize=(12, 8))

axes = axes.flatten()

columns = ['run', 'line', 'condition', 'replicate']

for ax, column in zip(axes, columns):
    adata_filtered.obs[column].value_counts().plot.bar(ax=ax, color='gray')
    ax.set_title(column)
    ax.set_ylabel('Count')
    ax.tick_params(axis='x', rotation=90)

plt.tight_layout()
plt.show()


adata_filtered

AnnData object with n_obs × n_vars = 480500 × 36601
    obs: 'tags', 'cmo', 'condition', 'replicate', 'condition_clean', 'line', 'run'
    var: 'gene_ids', 'feature_types', 'genome'


adata_filtered.write_h5ad('./adatas/combined_scRNA_seq_data_with_genetic_demux_filtered_consistent.h5ad')

	tags	run	cmo	condition	line	replicate	condition_clean
AAACCCAAGAAACCCG-1-501	AAACCCAAGAAACCCG-1	501	303.0	THYROID_AGONIST	CTL04E	3	THYROID_AGONIST
AAACCCAAGGACGCAT-1-501	AAACCCAAGGACGCAT-1	501	306.0	THYROID_AGONIST	CTL08A	3	THYROID_AGONIST
AAACCCAAGTTGAAAC-1-501	AAACCCAAGTTGAAAC-1	501	305.0	THYROID_AGONIST	CTL08A	2	THYROID_AGONIST
AAACCCACAACGGGTA-1-501	AAACCCACAACGGGTA-1	501	306.0	THYROID_AGONIST	CTL08A	3	THYROID_AGONIST
AAACCCACAAGATCCT-1-501	AAACCCACAAGATCCT-1	501	303.0	THYROID_AGONIST	CTL04E	3	THYROID_AGONIST

	tags	cmo	condition	replicate	condition_clean	Consensus_gene_demux	run_gene_demux
AAACCCAAGGACGCAT-1-501	AAACCCAAGGACGCAT-1	306.0	THYROID_AGONIST	3	THYROID_AGONIST	CTL08A	501
AAACCCAAGTTGAAAC-1-501	AAACCCAAGTTGAAAC-1	305.0	THYROID_AGONIST	2	THYROID_AGONIST	CTL08A	501
AAACCCACAACGGGTA-1-501	AAACCCACAACGGGTA-1	306.0	THYROID_AGONIST	3	THYROID_AGONIST	CTL08A	501
AAACCCACAAGATCCT-1-501	AAACCCACAAGATCCT-1	303.0	THYROID_AGONIST	3	THYROID_AGONIST	CTL04E	501
AAACCCACAATCAAGA-1-501	AAACCCACAATCAAGA-1	301.0	THYROID_AGONIST	1	THYROID_AGONIST	CTL04E	501

1. Environment¶

2. Load adata¶

3. Load aggregatedCall data¶

4. Combine data¶

5. Check data consistency¶

	cmo	run	condition	line	replicate
AAACCCAAGATGGTAT-1	309.0	513	MIX	CTL04E	3
AAACCCAAGCCTGAAG-1	311.0	513	MIX	CTL08A	2
AAACCCAAGCTTTCCC-1	312.0	513	MIX	CTL08A	3
AAACCCAAGGGATCTG-1	307.0	513	MIX	CTL04E	1
AAACCCAAGGGTGAGG-1	309.0	513	MIX	CTL04E	3

	Consensus	run
barcode
AAACCCAAGACCAACG-1	CTL04E	507
AAACCCAAGAGGTCGT-1	CTL08A	507
AAACCCAAGCCATTCA-1	CTL08A	507
AAACCCAAGCGCAATG-1	CTL08A	507
AAACCCAAGGAGGCAG-1	CTL08A	507

	Consensus_gene_demux	run_gene_demux
AAACCCAAGACTTCGT-1	CTL08A	503
AAACCCAAGAGTCAGC-1	CTL04E	503
AAACCCAAGCCTCAGC-1	CTL08A	503
AAACCCAAGCTACTGT-1	CTL08A	503
AAACCCAAGCTTGTGT-1	CTL04E	503

	index	Consensus_gene_demux	run_gene_demux
unique_index
AAACCCAAGACTTCGT-1-503	AAACCCAAGACTTCGT-1	CTL08A	503
AAACCCAAGAGTCAGC-1-503	AAACCCAAGAGTCAGC-1	CTL04E	503
AAACCCAAGCCTCAGC-1-503	AAACCCAAGCCTCAGC-1	CTL08A	503
AAACCCAAGCTACTGT-1-503	AAACCCAAGCTACTGT-1	CTL08A	503
AAACCCAAGCTTGTGT-1-503	AAACCCAAGCTTGTGT-1	CTL04E	503

	index	Consensus_gene_demux	run_gene_demux
AAACCCAAGAACTGAT-1-501	AAACCCAAGAACTGAT-1	doublet	501
AAACCCAAGAAGCGCT-1-491	AAACCCAAGAAGCGCT-1	doublet	491
AAACCCAAGAAGCTCG-1-505	AAACCCAAGAAGCTCG-1	CTL04E	505
AAACCCAAGAAGTGTT-1-493	AAACCCAAGAAGTGTT-1	CTL08A	493
AAACCCAAGACAAGCC-1-510	AAACCCAAGACAAGCC-1	CTL04E	510

	tags	run	cmo	condition	line	replicate	condition_clean	index	Consensus_gene_demux	run_gene_demux
AAACCCAAGAGGTCGT-1-508	AAACCCAAGAGGTCGT-1	508	NaN	RET_AGONIST	NaN	NaN	RET_AGONIST	AAACCCAAGAGGTCGT-1	CTL08A	508
AAACCCAAGATTGGGC-1-508	AAACCCAAGATTGGGC-1	508	NaN	RET_AGONIST	NaN	NaN	RET_AGONIST	AAACCCAAGATTGGGC-1	doublet	508
AAACCCAAGCAATTAG-1-508	AAACCCAAGCAATTAG-1	508	NaN	RET_AGONIST	NaN	NaN	RET_AGONIST	AAACCCAAGCAATTAG-1	doublet	508
AAACCCAAGCTGCCAC-1-508	AAACCCAAGCTGCCAC-1	508	NaN	RET_AGONIST	NaN	NaN	RET_AGONIST	AAACCCAAGCTGCCAC-1	doublet	508
AAACCCAAGGATTCCT-1-508	AAACCCAAGGATTCCT-1	508	NaN	RET_AGONIST	NaN	NaN	RET_AGONIST	AAACCCAAGGATTCCT-1	CTL08A	508

	tags	run	cmo	condition	line	replicate	condition_clean	index	Consensus_gene_demux	run_gene_demux	is_consistent
AAACCCAAGCACTCAT-1-393	AAACCCAAGCACTCAT-1	393	NaN	MTR_393_3GEX	NaN	NaN	NaN	AAACCCAAGCACTCAT-1	CTL01A	393	True
AAACCCAAGCCTGCCA-1-393	AAACCCAAGCCTGCCA-1	393	NaN	MTR_393_3GEX	NaN	NaN	NaN	AAACCCAAGCCTGCCA-1	CTL01A	393	True
AAACCCAAGGGATGTC-1-393	AAACCCAAGGGATGTC-1	393	NaN	MTR_393_3GEX	NaN	NaN	NaN	AAACCCAAGGGATGTC-1	CTL01A	393	True
AAACCCACAATCGCAT-1-393	AAACCCACAATCGCAT-1	393	NaN	MTR_393_3GEX	NaN	NaN	NaN	AAACCCACAATCGCAT-1	CTL02A	393	True
AAACCCACAGCTGAAG-1-393	AAACCCACAGCTGAAG-1	393	NaN	MTR_393_3GEX	NaN	NaN	NaN	AAACCCACAGCTGAAG-1	CTL01A	393	True

	tags	run	cmo	condition	line	replicate	condition_clean	index	Consensus_gene_demux	run_gene_demux	is_consistent
AAACGAAAGGGAGGGT-1-501	AAACGAAAGGGAGGGT-1	501	302.0	THYROID_AGONIST	CTL04E	2	THYROID_AGONIST	AAACGAAAGGGAGGGT-1	CTL08A	501	False
AAACGAACAGCGAGTA-1-501	AAACGAACAGCGAGTA-1	501	303.0	THYROID_AGONIST	CTL04E	3	THYROID_AGONIST	AAACGAACAGCGAGTA-1	CTL08A	501	False
AAACGCTTCCCAGTGG-1-501	AAACGCTTCCCAGTGG-1	501	305.0	THYROID_AGONIST	CTL08A	2	THYROID_AGONIST	AAACGCTTCCCAGTGG-1	CTL04E	501	False
AAAGGGCGTACGGGAT-1-501	AAAGGGCGTACGGGAT-1	501	305.0	THYROID_AGONIST	CTL08A	2	THYROID_AGONIST	AAAGGGCGTACGGGAT-1	CTL04E	501	False
AAAGTCCCAAAGACTA-1-501	AAAGTCCCAAAGACTA-1	501	306.0	THYROID_AGONIST	CTL08A	3	THYROID_AGONIST	AAAGTCCCAAAGACTA-1	CTL04E	501	False