Citation

BibTex format

@article{Beaney:2024:10.1038/s43856-024-00529-4,
author = {Beaney, T and Clarke, J and Salman, D and Woodcock, T and Majeed, F and Aylin, P and Barahona, M},
doi = {10.1038/s43856-024-00529-4},
journal = {Communications Medicine},
title = {Identifying multi-resolution clusters of diseases in ten million patients with multimorbidity in primary care in England},
url = {http://dx.doi.org/10.1038/s43856-024-00529-4},
volume = {4},
year = {2024}
}

RIS format (EndNote, RefMan)

TY  - JOUR
AB - BackgroundIdentifying clusters of diseases may aid understanding of shared aetiology, management of co-morbidities, and the discovery of new disease associations. Our study aims to identify disease clusters using a large set of long-term conditions and comparing methods that use the co-occurrence of diseases versus methods that use the sequence of disease development in a person over time.MethodsWe use electronic health records from over ten million people with multimorbidity registered to primary care in England. First, we extract data-driven representations of 212 diseases from patient records employing (i) co-occurrence-based methods and (ii) sequence-based natural language processing methods. Second, we apply the graph-based Markov Multiscale Community Detection (MMCD) to identify clusters based on disease similarity at multiple resolutions. We evaluate the representations and clusters using a clinically curated set of 253 known disease association pairs, and qualitatively assess the interpretability of the clusters.ResultsBoth co-occurrence and sequence-based algorithms generate interpretable disease representations, with the best performance from the skip-gram algorithm. MMCD outperforms k-means and hierarchical clustering in explaining known disease associations. We find that diseases display an almost-hierarchical structure across resolutions from closely to more loosely similar co-occurrence patterns and identify interpretable clusters corresponding to both established and novel patterns.ConclusionsOur method provides a tool for clustering diseases at different levels of resolution from co-occurrence patterns in high-dimensional electronic health records, which could be used to facilitate discovery of associations between diseases in the future.
AU - Beaney,T
AU - Clarke,J
AU - Salman,D
AU - Woodcock,T
AU - Majeed,F
AU - Aylin,P
AU - Barahona,M
DO - 10.1038/s43856-024-00529-4
PY - 2024///
SN - 2730-664X
TI - Identifying multi-resolution clusters of diseases in ten million patients with multimorbidity in primary care in England
T2 - Communications Medicine
UR - http://dx.doi.org/10.1038/s43856-024-00529-4
UR - https://www.nature.com/articles/s43856-024-00529-4
UR - http://hdl.handle.net/10044/1/111944
VL - 4
ER -