Colon Cancer
In [2]:
Copied!
import pandas as pd
import numpy as np
import os
import glob
import multiprocessing as mp
from multiprocessing import Pool
from pathlib import Path
import pandas as pd
import numpy as np
import os
import glob
import multiprocessing as mp
from multiprocessing import Pool
from pathlib import Path
In [3]:
Copied!
import scMethtools as scm
import scMethtools as scm
#### #### # # ##### # # ##### #### #### # #### # # # ## ## # # # # # # # # # # #### # # ## # # ###### # # # # # # #### # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #### #### # # # # # # #### #### ###### #### Version: 1.0.0, Tutorials: https://ngdc.cncb.ac.cn/methbank/scm
In [4]:
Copied!
import anndata as ad
import anndata as ad
In [3]:
Copied!
vmr_mean_df = pd.read_csv("./out_GEO/VMR_matrix/methylation_fractions.csv.gz")
vmr_mean_df = pd.read_csv("./out_GEO/VMR_matrix/methylation_fractions.csv.gz")
In [4]:
Copied!
vmr_mean_df
vmr_mean_df
Out[4]:
Unnamed: 0 | chr1:10470-12680 | chr1:64770-66890 | chr1:67080-69220 | chr1:114230-116260 | chr1:120180-123670 | chr1:139910-142060 | chr1:147880-158160 | chr1:171160-175120 | chr1:225470-227490 | ... | chrY:59214473-59216933 | chrY:59217023-59219033 | chrY:59233833-59237673 | chrY:59240733-59243003 | chrY:59249353-59251853 | chrY:59264243-59266613 | chrY:59267093-59271203 | chrY:59278903-59282823 | chrY:59330763-59332793 | chrY:59340013-59342083 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | CG_GSM2697488_scTrioSeq2Met_CRC01_LN1_166.singleC | NaN | 1.0 | NaN | NaN | NaN | 0.250000 | 1.00 | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | CG_GSM2697489_scTrioSeq2Met_CRC01_LN1_167.singleC | NaN | NaN | NaN | 1.0 | NaN | 0.500000 | NaN | 1.000000 | NaN | ... | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | CG_GSM2697490_scTrioSeq2Met_CRC01_LN1_168.singleC | NaN | 0.0 | NaN | NaN | NaN | NaN | 0.00 | 0.571429 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | CG_GSM2697491_scTrioSeq2Met_CRC01_LN1_169.singleC | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | CG_GSM2697492_scTrioSeq2Met_CRC01_LN1_170.singleC | NaN | 1.0 | 0.0 | NaN | NaN | 0.333333 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1290 | CG_GSM3241378_scTrioSeq2Met_CRC15_PT4_465.singleC | 1.000000 | NaN | 0.0 | NaN | NaN | NaN | NaN | 0.875000 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1291 | CG_GSM3241379_scTrioSeq2Met_CRC15_PT4_480.singleC | NaN | NaN | NaN | NaN | NaN | NaN | 1.00 | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1292 | CG_GSM3241380_scTrioSeq2Met_CRC15_PT5_481.singleC | 1.000000 | NaN | NaN | 0.0 | NaN | NaN | 0.00 | NaN | NaN | ... | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN |
1293 | CG_GSM3241381_scTrioSeq2Met_CRC15_PT5_482.singleC | 0.833333 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN |
1294 | CG_GSM3241382_scTrioSeq2Met_CRC15_PT5_496.singleC | NaN | 0.0 | NaN | 0.0 | NaN | NaN | 0.25 | 0.000000 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1295 rows × 107943 columns
In [7]:
Copied!
row_names = vmr_mean_df.iloc[:,0].tolist() #
col_names = vmr_mean_df.columns.tolist()[1:] #
data_df = vmr_mean_df.iloc[:, 1:]
vadata = ad.AnnData(X=data_df, obs=pd.DataFrame(index=row_names), var=pd.DataFrame(index=col_names))
row_names = vmr_mean_df.iloc[:,0].tolist() #
col_names = vmr_mean_df.columns.tolist()[1:] #
data_df = vmr_mean_df.iloc[:, 1:]
vadata = ad.AnnData(X=data_df, obs=pd.DataFrame(index=row_names), var=pd.DataFrame(index=col_names))
In [8]:
Copied!
vadata
vadata
Out[8]:
AnnData object with n_obs × n_vars = 1295 × 107942
In [12]:
Copied!
# realtive matrix
adata = ad.read_h5ad('./result/vadata.h5')
# realtive matrix
adata = ad.read_h5ad('./result/vadata.h5')
In [13]:
Copied!
adata
adata
Out[13]:
AnnData object with n_obs × n_vars = 1295 × 107942
In [14]:
Copied!
vadata.layers['relative'] = adata.X
vadata.layers['relative'] = adata.X
In [15]:
Copied!
vadata
vadata
Out[15]:
AnnData object with n_obs × n_vars = 1295 × 107942 layers: 'relative'
In [66]:
Copied!
import scipy.sparse as sparse
def qc_metrics(adata):
n_samples = adata.X.shape[0]
n_features = adata.X.shape[1]
if sparse.issparse(adata.X):
#coverage_features = [n_features - np.isnan(line.toarray()).sum() for line in X]
coverage_features = np.sum(~np.isnan(adata.X.toarray()), axis=1)
methylation_features = np.nanmean(adata.X.toarray(), axis=1)
feature_mean = np.nanmean(adata.X.toarray(), axis=0)
coverage_cells = np.sum(~np.isnan(adata.X.toarray()), axis=0).astype(int)
# 计算平方均值 (x * x).mean()
mean_sq = (adata.X.power(2).mean(axis=0)).A1
# 计算均值
mean = adata.X.mean(axis=0).A1
else:
coverage_features = np.sum(~np.isnan(adata.X), axis=1)
methylation_features = np.nanmean(adata.X, axis=1)
feature_mean = np.nanmean(adata.X, axis=0)
coverage_cells = np.sum(~np.isnan(adata.X), axis=0).astype(int)
mean_sq = np.nanmean(np.power(vadata.X, 2), axis=0)
mean = feature_mean
#obs
adata.obs['mean'] = methylation_features
adata.obs['n_features'] = coverage_features
# 将结果存储到 adata.var
adata.var['n_cells'] = coverage_cells
adata.var['mean'] = feature_mean
# 计算样本方差 (mean_sq - mean**2) * (n_samples / (n_samples - 1))
var = (mean_sq - mean**2) * (n_samples / (n_samples - 1))
# 将均值中小于阈值的值替换为 1e-12
threshold = 1e-12
mean[mean < threshold] = threshold
# 计算 dispersion
dispersion = var / mean
# 将结果存储到 adata.var
adata.var['dispersion'] = dispersion
adata.var['dispersion_log'] = np.log(dispersion)
# Calculate mean and coverage
mean_mean = adata.var['mean'].mean()
mean_coverage = adata.var['n_cells'].mean()
adata.var['NormalizedDispersion'] = dispersion / (mean_mean * mean_coverage)
import scipy.sparse as sparse
def qc_metrics(adata):
n_samples = adata.X.shape[0]
n_features = adata.X.shape[1]
if sparse.issparse(adata.X):
#coverage_features = [n_features - np.isnan(line.toarray()).sum() for line in X]
coverage_features = np.sum(~np.isnan(adata.X.toarray()), axis=1)
methylation_features = np.nanmean(adata.X.toarray(), axis=1)
feature_mean = np.nanmean(adata.X.toarray(), axis=0)
coverage_cells = np.sum(~np.isnan(adata.X.toarray()), axis=0).astype(int)
# 计算平方均值 (x * x).mean()
mean_sq = (adata.X.power(2).mean(axis=0)).A1
# 计算均值
mean = adata.X.mean(axis=0).A1
else:
coverage_features = np.sum(~np.isnan(adata.X), axis=1)
methylation_features = np.nanmean(adata.X, axis=1)
feature_mean = np.nanmean(adata.X, axis=0)
coverage_cells = np.sum(~np.isnan(adata.X), axis=0).astype(int)
mean_sq = np.nanmean(np.power(vadata.X, 2), axis=0)
mean = feature_mean
#obs
adata.obs['mean'] = methylation_features
adata.obs['n_features'] = coverage_features
# 将结果存储到 adata.var
adata.var['n_cells'] = coverage_cells
adata.var['mean'] = feature_mean
# 计算样本方差 (mean_sq - mean**2) * (n_samples / (n_samples - 1))
var = (mean_sq - mean**2) * (n_samples / (n_samples - 1))
# 将均值中小于阈值的值替换为 1e-12
threshold = 1e-12
mean[mean < threshold] = threshold
# 计算 dispersion
dispersion = var / mean
# 将结果存储到 adata.var
adata.var['dispersion'] = dispersion
adata.var['dispersion_log'] = np.log(dispersion)
# Calculate mean and coverage
mean_mean = adata.var['mean'].mean()
mean_coverage = adata.var['n_cells'].mean()
adata.var['NormalizedDispersion'] = dispersion / (mean_mean * mean_coverage)
In [67]:
Copied!
qc_metrics(vadata)
qc_metrics(vadata)
Meta Data¶
In [36]:
Copied!
obs = pd.read_csv("./result/methylation_obs.csv", index_col=0)
obs.head()
obs = pd.read_csv("./result/methylation_obs.csv", index_col=0)
obs.head()
Out[36]:
coverage_feature | mean_cell_methylation | Accession | Title | Sample Type | Taxonomy | Channels | Platform | Series | SRA Accession | cell | patient | sample_region | other_info | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CRC04_LN1_349 | 61150 | -0.073804 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | CRC04_LN1_349 | CRC04 | LN1 | 349 |
CRC04_LN1_350 | 52880 | -0.088555 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | CRC04_LN1_350 | CRC04 | LN1 | 350 |
CRC04_LN1_351 | 52989 | -0.098562 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | CRC04_LN1_351 | CRC04 | LN1 | 351 |
CRC04_LN1_352 | 50023 | -0.055263 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | CRC04_LN1_352 | CRC04 | LN1 | 352 |
CRC04_LN1_353 | 50305 | -0.065335 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | CRC04_LN1_353 | CRC04 | LN1 | 353 |
In [37]:
Copied!
# 提取指定的列和索引
subset_obs = obs[['cell', 'patient', 'sample_region']]
# 将索引列添加为一个普通列
subset_obs['index'] = subset_obs.index
subset_obs.head()
# 提取指定的列和索引
subset_obs = obs[['cell', 'patient', 'sample_region']]
# 将索引列添加为一个普通列
subset_obs['index'] = subset_obs.index
subset_obs.head()
/tmp/ipykernel_111744/1564921406.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy subset_obs['index'] = subset_obs.index
Out[37]:
cell | patient | sample_region | index | |
---|---|---|---|---|
CRC04_LN1_349 | CRC04_LN1_349 | CRC04 | LN1 | CRC04_LN1_349 |
CRC04_LN1_350 | CRC04_LN1_350 | CRC04 | LN1 | CRC04_LN1_350 |
CRC04_LN1_351 | CRC04_LN1_351 | CRC04 | LN1 | CRC04_LN1_351 |
CRC04_LN1_352 | CRC04_LN1_352 | CRC04 | LN1 | CRC04_LN1_352 |
CRC04_LN1_353 | CRC04_LN1_353 | CRC04 | LN1 | CRC04_LN1_353 |
In [39]:
Copied!
vadata.obs['Title'] = vadata.obs.index
vadata.obs['Title'] = vadata.obs.index
In [42]:
Copied!
vadata.obs.index = vadata.obs['Title'].str.extract(r'_([^_]+_[^_]+_[^_]+)\.singleC')[0].tolist()
vadata.obs.index = vadata.obs['Title'].str.extract(r'_([^_]+_[^_]+_[^_]+)\.singleC')[0].tolist()
In [48]:
Copied!
vadata.obs['Accession'] = vadata.obs['Title'].str.extract(r'(GSM\d+)')
vadata.obs['Accession'] = vadata.obs['Title'].str.extract(r'(GSM\d+)')
In [52]:
Copied!
vadata.obs['patient'] = vadata.obs.index.str.split('_').str[0].tolist()
vadata.obs['patient'] = vadata.obs.index.str.split('_').str[0].tolist()
In [53]:
Copied!
vadata.obs['sample_region'] = vadata.obs.index.str.split('_').str[1].tolist()
vadata.obs['sample_region'] = vadata.obs.index.str.split('_').str[1].tolist()
In [54]:
Copied!
vadata.obs['condition'] = vadata.obs['sample_region'].str.replace(r'\d+', '', regex=True)
vadata.obs['condition'] = vadata.obs['sample_region'].str.replace(r'\d+', '', regex=True)
In [55]:
Copied!
vadata.obs
vadata.obs
Out[55]:
mean | n_features | Title | Accession | patient | sample_region | condition | |
---|---|---|---|---|---|---|---|
CRC01_LN1_166 | 0.652891 | 73519 | CG_GSM2697488_scTrioSeq2Met_CRC01_LN1_166.singleC | GSM2697488 | CRC01 | LN1 | LN |
CRC01_LN1_167 | 0.648543 | 72193 | CG_GSM2697489_scTrioSeq2Met_CRC01_LN1_167.singleC | GSM2697489 | CRC01 | LN1 | LN |
CRC01_LN1_168 | 0.268596 | 61218 | CG_GSM2697490_scTrioSeq2Met_CRC01_LN1_168.singleC | GSM2697490 | CRC01 | LN1 | LN |
CRC01_LN1_169 | 0.651993 | 55825 | CG_GSM2697491_scTrioSeq2Met_CRC01_LN1_169.singleC | GSM2697491 | CRC01 | LN1 | LN |
CRC01_LN1_170 | 0.653039 | 63547 | CG_GSM2697492_scTrioSeq2Met_CRC01_LN1_170.singleC | GSM2697492 | CRC01 | LN1 | LN |
... | ... | ... | ... | ... | ... | ... | ... |
CRC15_PT4_465 | 0.557914 | 61136 | CG_GSM3241378_scTrioSeq2Met_CRC15_PT4_465.singleC | GSM3241378 | CRC15 | PT4 | PT |
CRC15_PT4_480 | 0.555016 | 42216 | CG_GSM3241379_scTrioSeq2Met_CRC15_PT4_480.singleC | GSM3241379 | CRC15 | PT4 | PT |
CRC15_PT5_481 | 0.664971 | 72195 | CG_GSM3241380_scTrioSeq2Met_CRC15_PT5_481.singleC | GSM3241380 | CRC15 | PT5 | PT |
CRC15_PT5_482 | 0.556494 | 61454 | CG_GSM3241381_scTrioSeq2Met_CRC15_PT5_482.singleC | GSM3241381 | CRC15 | PT5 | PT |
CRC15_PT5_496 | 0.577864 | 80556 | CG_GSM3241382_scTrioSeq2Met_CRC15_PT5_496.singleC | GSM3241382 | CRC15 | PT5 | PT |
1295 rows × 7 columns
In [72]:
Copied!
vadata.var
vadata.var
Out[72]:
n_cells | mean | dispersion | dispersion_log | NormalizedDispersion | sr_var | |
---|---|---|---|---|---|---|
chr1:10470-12680 | 732 | 0.737960 | 0.189790 | -1.661838 | 0.000514 | 0.085005 |
chr1:64770-66890 | 163 | 0.512270 | 0.479118 | -0.735808 | 0.001299 | 0.066639 |
chr1:67080-69220 | 196 | 0.370238 | 0.491135 | -0.711036 | 0.001331 | 0.067556 |
chr1:114230-116260 | 371 | 0.560647 | 0.364143 | -1.010209 | 0.000987 | 0.066250 |
chr1:120180-123670 | 254 | 0.526461 | 0.351589 | -1.045294 | 0.000953 | 0.068894 |
... | ... | ... | ... | ... | ... | ... |
chrY:59264243-59266613 | 36 | 0.546296 | 0.391859 | -0.936852 | 0.001062 | 0.066584 |
chrY:59267093-59271203 | 55 | 0.592987 | 0.316340 | -1.150937 | 0.000858 | 0.071116 |
chrY:59278903-59282823 | 3 | 0.333333 | 0.667182 | -0.404693 | 0.001809 | 0.075617 |
chrY:59330763-59332793 | 2 | 0.500000 | 0.500386 | -0.692375 | 0.001356 | 0.082373 |
chrY:59340013-59342083 | 7 | 0.642857 | 0.301820 | -1.197923 | 0.000818 | 0.100590 |
107942 rows × 6 columns
In [71]:
Copied!
X = vadata.layers['relative']
vadata.var['sr_var'] = np.nanvar(X,axis=0)
X = vadata.layers['relative']
vadata.var['sr_var'] = np.nanvar(X,axis=0)
Save¶
In [73]:
Copied!
vadata.write('./result/data/CG_mean_re_methylation.h5ad')
vadata.write('./result/data/CG_mean_re_methylation.h5ad')
In [5]:
Copied!
vadata = ad.read('./result/data/CG_mean_re_methylation.h5ad')
vadata = ad.read('./result/data/CG_mean_re_methylation.h5ad')
/asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/anndata/__init__.py:51: FutureWarning: `anndata.read` is deprecated, use `anndata.read_h5ad` instead. `ad.read` will be removed in mid 2024. warnings.warn(
In [6]:
Copied!
vadata
vadata
Out[6]:
AnnData object with n_obs × n_vars = 1278 × 75656 obs: 'mean', 'n_features', 'Title', 'Accession', 'patient', 'sample_region', 'condition', 'pct_peaks', 'leiden', 'cell_name', 'n_obs', 'n_meth', 'global_meth_frac' var: 'n_cells', 'mean', 'dispersion', 'dispersion_log', 'NormalizedDispersion', 'sr_var', 'covered_cell', 'pct_cell', 'feature_select_sr', 'feature_select_NormDispersion' uns: 'condition_colors', 'leiden', 'leiden_colors', 'neighbors', 'patient_colors', 'relative_pca_var_ratios', 'sample_region_colors', 'tsne', 'umap' obsm: 'X_tsne', 'X_umap', 'relative_pca' layers: 'relative' obsp: 'connectivities', 'distances'
In [20]:
Copied!
scm.pl.umap(vadata,color=['patient','condition','leiden'])
scm.pl.umap(vadata,color=['patient','condition','leiden'])
In [14]:
Copied!
df = scm.dm.mdiff_pairwise(vadata,"condition", groups = ['NC'],reference ='rest' , method='wilcoxon',key_added = "wilcoxon_x", top_n =500 ,matrix=True)
df = scm.dm.mdiff_pairwise(vadata,"condition", groups = ['NC'],reference ='rest' , method='wilcoxon',key_added = "wilcoxon_x", top_n =500 ,matrix=True)
/asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/numpy/core/fromnumeric.py:86: FutureWarning: The behavior of DataFrame.sum with axis=None is deprecated, in a future version this will reduce over both axes and return a scalar. To retain the old behavior, pass axis=0 (or do not pass axis) return reduction(axis=axis, out=out, **passkwargs)
Importing differential methylated region dataframe for ['NC'] v.s. rest
In [15]:
Copied!
df
df
Out[15]:
names | scores | logfoldchanges | pvals | pvals_adj | |
---|---|---|---|---|---|
0 | chr2:10152176-10157286 | 15.282095 | NaN | 1.006528e-52 | 4.230551e-49 |
1 | chr21:9877003-9881613 | 15.205648 | NaN | 3.244029e-52 | 1.128088e-48 |
2 | chr12:34488449-34498879 | 14.992649 | NaN | 8.201527e-51 | 1.632881e-47 |
3 | chr22:46472478-46481018 | 14.963617 | NaN | 1.269347e-50 | 2.342286e-47 |
4 | chr2:233924316-233928856 | 14.646161 | NaN | 1.425448e-48 | 1.497829e-45 |
... | ... | ... | ... | ... | ... |
495 | chrX:28519473-28527553 | 9.644186 | NaN | 5.202219e-22 | 2.264161e-21 |
496 | chr21:44887463-44889693 | 9.636016 | NaN | 5.633176e-22 | 2.445115e-21 |
497 | chr18:75974501-75981031 | 9.629598 | NaN | 5.996408e-22 | 2.596521e-21 |
498 | chr3:123859157-123866177 | 9.629160 | NaN | 6.022001e-22 | 2.606559e-21 |
499 | chr8:66894938-66904878 | 9.624783 | NaN | 6.283952e-22 | 2.714352e-21 |
500 rows × 5 columns
In [16]:
Copied!
df.to_csv('./NC_rest_dmr.csv',index=False)
df.to_csv('./NC_rest_dmr.csv',index=False)
In [21]:
Copied!
leiden_diff = scm.dm.mdiff(vadata,"condition", method='wilcoxon',key_added = "leiden_diff", top_n =100 ,matrix=True)
leiden_diff = scm.dm.mdiff(vadata,"condition", method='wilcoxon',key_added = "leiden_diff", top_n =100 ,matrix=True)
/asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/numpy/core/fromnumeric.py:86: FutureWarning: The behavior of DataFrame.sum with axis=None is deprecated, in a future version this will reduce over both axes and return a scalar. To retain the old behavior, pass axis=0 (or do not pass axis) return reduction(axis=axis, out=out, **passkwargs)
In [22]:
Copied!
leiden_diff
leiden_diff
Out[22]:
group | names | scores | logfoldchanges | pvals | pvals_adj | |
---|---|---|---|---|---|---|
0 | LN | chr3:184287217-184303897 | 10.910848 | NaN | 1.022980e-27 | 1.116692e-27 |
1 | LN | chr13:100638821-100646191 | 10.484183 | NaN | 1.021238e-25 | 1.106457e-25 |
2 | LN | chr3:128718447-128722697 | 10.309122 | NaN | 6.408390e-25 | 6.922826e-25 |
3 | LN | chr9:139415458-139419478 | 9.080785 | NaN | 1.077946e-19 | 1.146600e-19 |
4 | LN | chr14:103591561-103595301 | 9.061971 | NaN | 1.281150e-19 | 1.362421e-19 |
... | ... | ... | ... | ... | ... | ... |
595 | PT | chr6:73329962-73335072 | 5.115004 | NaN | 3.137348e-07 | 3.220351e-07 |
596 | PT | chr3:62354387-62361457 | 5.091096 | NaN | 3.560002e-07 | 3.653890e-07 |
597 | PT | chr13:110957401-110961931 | 5.087136 | NaN | 3.635103e-07 | 3.730768e-07 |
598 | PT | chr7:2564261-2567041 | 5.079142 | NaN | 3.791437e-07 | 3.890953e-07 |
599 | PT | chr20:33144900-33147000 | 5.049447 | NaN | 4.430904e-07 | 4.546587e-07 |
600 rows × 6 columns
In [23]:
Copied!
leiden_diff.to_csv('./condition_dmr.csv',index=False)
leiden_diff.to_csv('./condition_dmr.csv',index=False)
In [24]:
Copied!
leiden_diff = scm.dm.mdiff(vadata,"leiden", method='t-test',key_added = "leiden_diff", top_n =100 ,matrix=True)
leiden_diff = scm.dm.mdiff(vadata,"leiden", method='t-test',key_added = "leiden_diff", top_n =100 ,matrix=True)
/asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2( /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'names'] = self.var_names[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'scores'] = scores[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals'] = pvals[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices] /asnas/baoym_group/zongwt/software/miniconda3/envs/py39/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` self.stats[group_name, 'logfoldchanges'] = np.log2(
In [25]:
Copied!
leiden_diff
leiden_diff
Out[25]:
group | names | scores | logfoldchanges | pvals | pvals_adj | |
---|---|---|---|---|---|---|
0 | 0 | chr16:46399746-46404946 | 7.249851 | 0.519170 | 5.422704e-11 | 0.000002 |
1 | 0 | chr16:46383966-46393026 | 7.232152 | 0.522983 | 5.866324e-11 | 0.000002 |
2 | 0 | chr16:46405876-46410846 | 6.657272 | 0.504263 | 9.754390e-10 | 0.000025 |
3 | 0 | chr18:25577811-25580001 | 0.000000 | NaN | 1.000000e+00 | 1.000000 |
4 | 0 | chr18:27003211-27007831 | 0.000000 | NaN | 1.000000e+00 | 1.000000 |
... | ... | ... | ... | ... | ... | ... |
2395 | 23 | chr18:26099011-26104601 | 0.000000 | NaN | 1.000000e+00 | 1.000000 |
2396 | 23 | chr18:26141541-26146431 | 0.000000 | NaN | 1.000000e+00 | 1.000000 |
2397 | 23 | chr18:26155281-26158341 | 0.000000 | NaN | 1.000000e+00 | 1.000000 |
2398 | 23 | chr18:26185441-26190621 | 0.000000 | NaN | 1.000000e+00 | 1.000000 |
2399 | 23 | chr18:23432261-23434471 | 0.000000 | NaN | 1.000000e+00 | 1.000000 |
2400 rows × 6 columns
In [28]:
Copied!
scm.pl.umap(vadata,color=['leiden','chr16:46399746-46404946'])
scm.pl.umap(vadata,color=['leiden','chr16:46399746-46404946'])
In [26]:
Copied!
leiden_diff.to_csv('./leiden_clustet_dmr.csv',index=False)
leiden_diff.to_csv('./leiden_clustet_dmr.csv',index=False)
In [27]:
Copied!
leiden_diff['names'].unique
leiden_diff['names'].unique
Out[27]:
<bound method Series.unique of 0 chr16:46399746-46404946 1 chr16:46383966-46393026 2 chr16:46405876-46410846 3 chr18:25577811-25580001 4 chr18:27003211-27007831 ... 2395 chr18:26099011-26104601 2396 chr18:26141541-26146431 2397 chr18:26155281-26158341 2398 chr18:26185441-26190621 2399 chr18:23432261-23434471 Name: names, Length: 2400, dtype: object>
In [ ]:
Copied!