@inproceedings{10.1145/3133956.3134061,
author = {Chen, Hao and Laine, Kim and Rindal, Peter},
title = {Fast Private Set Intersection from Homomorphic Encryption},
year = {2017},
isbn = {9781450349468},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3133956.3134061},
doi = {10.1145/3133956.3134061},
booktitle = {Proceedings of the 2017 ACM SIGSAC Conference on Computer and Communications Security},
pages = {1243–1255},
numpages = {13},
keywords = {private set intersection, fully homomorphic encryption},
location = {Dallas, Texas, USA},
series = {CCS ’17}
}
Private Set Intersection (PSI) is a cryptographic technique that allows two parties to compute the intersection of their sets without revealing anything except the intersection. We use fully homomorphic encryption to construct a fast PSI protocol with a small communication overhead that works particularly well when one of the two sets is much smaller than the other, and is secure against semi-honest adversaries.
The most computationally efficient PSI protocols have been constructed using tools such as hash functions and oblivious transfer, but a potential limitation with these approaches is the communication complexity, which scales linearly with the size of the larger set. This is of particular concern when performing PSI between a constrained device (cellphone) holding a small set, and a large service provider (e.g. WhatsApp), such as in the Private Contact Discovery application.
Our protocol has communication complexity linear in the size of the smaller set, and logarithmic in the larger set. More precisely, if the set sizes are Ny < Nx, we achieve a communication overhead of O(Ny log Nx). Our running-time-optimized benchmarks show that it takes 36 seconds of online-computation, 71 seconds of non-interactive (receiver-independent) pre-processing, and only 12.5MB of round trip communication to intersect five thousand 32-bit strings with 16 million 32-bit strings. Compared to prior works, this is roughly a 38–115x reduction in communication with minimal difference in computational overhead.