@inproceedings{0ccfc2abb2094814ad7648ac659838ec,
title = "Anonymizing k-NN classification on mapreduce",
abstract = "Data analytics scenario such as a classification algorithm plays an important role in data mining to identify a category of a new observation and is often used to drive new knowledge. However, classification algorithm on a big data analytics platform such as MapReduce and Spark, often runs on plain text without an appropriate privacy protection mechanism. This leaves user{\textquoteright}s data to be vulnerable from unauthorized access and puts the data at a great privacy risk. To address such concern, we propose a new novel k-NN classifier which can run on an anonymized dataset on MapReduce platform. We describe new Map and Reduce algorithms to produce different anonymized datasets for k-NN classifier. We also illustrate the details of experiments we performed on the multiple anonymized data sets to understand the effects between the level of privacy protection (data privacy) and the high-value insights (data utility) trade-off before and after data anonymization.",
keywords = "Data anonymization, K-anonymity, k-NN classification, MapReduce",
author = "Bazai, {Sibghat Ullah} and Julian Jang-Jaccard and Ruili Wang",
note = "Publisher Copyright: {\textcopyright} ICST Institute for Computer Sciences, Social Informatics and Telecommunications Engineering 2018.; 9th International Conference on Mobile Networks and Management, MONAMI 2017 ; Conference date: 13-12-2017 Through 15-12-2017",
year = "2018",
doi = "10.1007/978-3-319-90775-8_29",
language = "English",
isbn = "9783319907741",
series = "Lecture Notes of the Institute for Computer Sciences, Social-Informatics and Telecommunications Engineering, LNICST",
publisher = "Springer Verlag",
pages = "364--377",
editor = "Sheng Wen and Jiankun Hu and Ibrahim Khalil and Zahir Tari",
booktitle = "Mobile Networks and Management - 9th International Conference, MONAMI 2017, Proceedings",
address = "Germany",
}