Citation

BibTex format

@inproceedings{Altuncu:2018,
author = {Altuncu, MT and Mayer, E and Yaliraki, SN and Barahona, M},
title = {From Text to Topics in Healthcare Records: An Unsupervised Graph Partitioning Methodology},
url = {http://arxiv.org/abs/1807.02599v1},
year = {2018}
}

RIS format (EndNote, RefMan)

TY  - CPAPER
AB - Electronic Healthcare Records contain large volumes of unstructured data,including extensive free text. Yet this source of detailed information oftenremains under-used because of a lack of methodologies to extract interpretablecontent in a timely manner. Here we apply network-theoretical tools to analysefree text in Hospital Patient Incident reports from the National HealthService, to find clusters of documents with similar content in an unsupervisedmanner at different levels of resolution. We combine deep neural networkparagraph vector text-embedding with multiscale Markov Stability communitydetection applied to a sparsified similarity graph of document vectors, andshowcase the approach on incident reports from Imperial College Healthcare NHSTrust, London. The multiscale community structure reveals different levels ofmeaning in the topics of the dataset, as shown by descriptive terms extractedfrom the clusters of records. We also compare a posteriori against hand-codedcategories assigned by healthcare personnel, and show that our approachoutperforms LDA-based models. Our content clusters exhibit good correspondencewith two levels of hand-coded categories, yet they also provide further medicaldetail in certain areas and reveal complementary descriptors of incidentsbeyond the external classification taxonomy.
AU - Altuncu,MT
AU - Mayer,E
AU - Yaliraki,SN
AU - Barahona,M
PY - 2018///
TI - From Text to Topics in Healthcare Records: An Unsupervised Graph Partitioning Methodology
UR - http://arxiv.org/abs/1807.02599v1
ER -