%% This BibTeX bibliography file was created using BibDesk. %% http://bibdesk.sourceforge.net/ %% Created for sartina at 2021-10-12 10:51:55 +0200 %% Saved with string encoding Unicode (UTF-8) @inproceedings{genius:16ccs, abstract = {Because of rampant security breaches in IoT devices, searching vulnerabilities in massive IoT ecosystems is more crucial than ever. Recent studies have demonstrated that control-flow graph (CFG) based bug search techniques can be effective and accurate in IoT devices across different architectures. However, these CFG-based bug search approaches are far from being scalable to handle an enormous amount of IoT devices in the wild, due to their expensive graph matching overhead. Inspired by rich experience in image and video search, we propose a new bug search scheme which addresses the scalability challenge in existing cross-platform bug search techniques and further improves search accuracy. Unlike existing techniques that directly conduct searches based upon raw features (CFGs) from the binary code, we convert the CFGs into high-level numeric feature vectors. Compared with the CFG feature, high-level numeric feature vectors are more robust to code variation across different architectures, and can easily achieve realtime search by using state-of-the-art hashing techniques. We have implemented a bug search engine, Genius, and compared it with state-of-art bug search approaches. Experimental results show that Genius outperforms baseline approaches for various query loads in terms of speed and accuracy. We also evaluated Genius on a real-world dataset of 33,045 devices which was collected from public sources and our system. The experiment showed that Genius can finish a search within 1 second on average when performed over 8,126 firmware images of 420,558,702 functions. By only looking at the top 50 candidates in the search result, we found 38 potentially vulnerable firmware images across 5 vendors, and confirmed 23 of them by our manual analysis. We also found that it took only 0.1 seconds on average to finish searching for all 154 vulnerabilities in two latest commercial firmware images from D-LINK. 103 of them are potentially vulnerable in these images, and 16 of them were confirmed.}, address = {New York, NY, USA}, author = {Feng, Qian and Zhou, Rundong and Xu, Chengcheng and Cheng, Yao and Testa, Brian and Yin, Heng}, booktitle = {Proc. of the ACM SIGSAC Conference on Computer and Communications Security (CCS)}, date-added = {2021-10-12 10:51:27 +0200}, date-modified = {2021-10-12 10:51:55 +0200}, doi = {10.1145/2976749.2978370}, isbn = {9781450341394}, keywords = {graph encoding, machine learning, firmware security}, location = {Vienna, Austria}, numpages = {12}, pages = {480--491}, publisher = {Association for Computing Machinery}, series = {CCS '16}, title = {Scalable Graph-Based Bug Search for Firmware Images}, url = {https://doi.org/10.1145/2976749.2978370}, year = {2016}, Note = {Source code: \url{https://github.com/qian-feng/Gencoding}}, bdsk-url-1 = {https://doi.org/10.1145/2976749.2978370}} @INPROCEEDINGS{cacompare17ICPC, author={Hu, Yikun and Zhang, Yuanyuan and Li, Juanru and Gu, Dawu}, booktitle={Proc. of the IEEE/ACM International Conference on Program Comprehension (ICPC)}, title={Binary Code Clone Detection across Architectures and Compiling Configurations}, year={2017}, volume={}, number={}, pages={88-98}, doi={10.1109/ICPC.2017.22}} @inproceedings{bingo16fse, author = {Chandramohan, Mahinthan and Xue, Yinxing and Xu, Zhengzi and Liu, Yang and Cho, Chia Yuan and Tan, Hee Beng Kuan}, title = {BinGo: Cross-Architecture Cross-OS Binary Search}, year = {2016}, isbn = {9781450342186}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/2950290.2950350}, doi = {10.1145/2950290.2950350}, booktitle = {Proc. of the ACM SIGSOFT International Symposium on Foundations of Software Engineering (FSE)}, pages = {678–689}, numpages = {12}, keywords = {Binary Code Searching, Vulnerability Matching}, location = {Seattle, WA, USA}, series = {FSE 2016} } @ARTICLE{bingoe19, author={Xue, Yinxing and Xu, Zhengzi and Chandramohan, Mahinthan and Liu, Yang}, journal={IEEE Transactions on Software Engineering}, title={Accurate and Scalable Cross-Architecture Cross-OS Binary Code Search with Emulation}, year={2019}, volume={45}, number={11}, pages={1125-1149}, doi={10.1109/TSE.2018.2827379}} @inproceedings{gemini17ccs, author = {Xiaojun Xu and Chang Liu and Qian Feng and Heng Yin and Le Song and Dawn Song}, editor = {Bhavani M. Thuraisingham and David Evans and Tal Malkin and Dongyan Xu}, title = {Neural Network-based Graph Embedding for Cross-Platform Binary Code Similarity Detection}, booktitle = {Proc. of the {ACM} {SIGSAC} Conference on Computer and Communications Security (CCS)}, pages = {363--376}, publisher = {{ACM}}, year = {2017}, url = {https://doi.org/10.1145/3133956.3134018}, doi = {10.1145/3133956.3134018}, timestamp = {Tue, 10 Nov 2020 19:59:50 +0100}, biburl = {https://dblp.org/rec/conf/ccs/XuLFYSS17.bib}, Note = {Source code: \url{https://github.com/xiaojunxu/dnn-binary-code-similarity}}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{bindnn17securecomm, abstract = {Determining if two functions taken from different compiled binaries originate from the same function in the source code has many applications to malware reverse engineering. Namely, this process allows an analyst to filter large swaths of code, removing functions that have been previously observed or those that originate in shared or trusted libraries. However, this task is challenging due to the myriad factors that influence the translation between source code and assembly instructions---the instruction stream created by a compiler is heavily influenced by a number of factors including optimizations, target platforms, and runtime constraints. In this paper, we seek to advance methods for reliably testing the equivalence of functions found in different executables. By leveraging advances in deep learning and natural language processing, we design and evaluate a novel algorithm, BinDNN, that is resilient to variations in compiler, compiler optimization level, and architecture. We show that BinDNN is effective both in isolation or in conjunction with existing approaches. In the case of the latter, we boost performance by 109{\%} when combining BinDNN with BinDiff to compare functions across architectures. This result---an improvement of 32{\%} for BinDNN and 185{\%} for BinDiff---demonstrates the utility of employing multiple orthogonal approaches to function matching.}, address = {Cham}, author = {Lageman, Nathaniel and Kilmer, Eric D. and Walls, Robert J. and McDaniel, Patrick D.}, booktitle = {Proc. of the International Conference on Security and Privacy in Communication Systems (SecureComm)}, date-added = {2021-10-12 10:45:53 +0200}, date-modified = {2021-10-12 10:46:16 +0200}, editor = {Deng, Robert and Weng, Jian and Ren, Kui and Yegneswaran, Vinod}, isbn = {978-3-319-59608-2}, pages = {517--537}, publisher = {Springer International Publishing}, title = {BinDNN: Resilient Function Matching Using Deep Learning}, year = {2017}} @inproceedings{discovre16, author = {Sebastian Eschweiler and Khaled Yakdan and Elmar Gerhards-Padilla}, booktitle = {Proc. of the Annual Network and Distributed System Security Symposium (NDSS)}, date-added = {2021-10-12 10:40:16 +0200}, date-modified = {2021-10-12 10:40:59 +0200}, doi = {10.14722/ndss.2016.23185}, isbn = {1-891562-41-X}, pages = {1 -- 15}, publisher = {Internet Society}, title = {discovRE: Efficient Cross-Architecture Identification of Bugs in Binary Code}, year = {2016}, bdsk-url-1 = {https://doi.org/10.14722/ndss.2016.23185}} @article{leedexofuzzy, author = {Lee, Shinho and Jung, Wookhyun and Kim, Sangwon and Lee, Jihyun and Kim, Jun-Seob}, date-added = {2021-10-12 10:27:56 +0200}, date-modified = {2021-10-12 10:28:38 +0200}, journal = {Virus Bulletin}, note = {Source code: \url{https://github.com/lee1029ng/Dexofuzzy}}, title = {Dexofuzzy: Android Malware Similarity Clustering Method using Opcode Sequence}, year = {2019}} @inproceedings{droidegle15wisec, abstract = {Repackaged malware and phishing malware consist 86% [35] of all Android malware, and they significantly affect the Android ecosystem. Previous work use disassembled Dalvik bytecode and hashing approaches to detect repackaged malware, but these approaches are vulnerable to obfuscation attacks and they demand large computational resources on mobile devices. In this work, we propose a novel methodology which uses the layout resources within an app to detect apps which are "visually similar", a common characteristic in repackaged apps and phishing malware. To detect visually similar apps, we design and implement DroidEagle which consists of two sub-systems: RepoEagle and HostEagle. RepoEagle is to perform large scale detection on apps repositories (e.g., apps markets), and HostEagle is a lightweight mobile app which can help users to quickly detect visually similar Android app upon download. We demonstrate the high accuracy and efficiency of DroidEagle: Within 3 hours RepoEagle can detect 1298 visually similar apps from 99 626 apps in a repository. In less than one second, HostEagle can help an Android user to determine whether a downloaded mobile app is a repackaged apps or a phishing malware. This is the first work which provides both speed and scalability in discovering repackaged apps and phishing malware in Android system.}, address = {New York, NY, USA}, articleno = {9}, author = {Sun, Mingshen and Li, Mengmeng and Lui, John C. S.}, booktitle = {Proc. of the ACM Conference on Security & Privacy in Wireless and Mobile Networks (WiSec)}, date-added = {2021-10-12 10:25:13 +0200}, date-modified = {2021-10-12 10:25:24 +0200}, doi = {10.1145/2766498.2766508}, isbn = {9781450336239}, location = {New York, New York}, numpages = {12}, publisher = {Association for Computing Machinery}, series = {WiSec '15}, title = {DroidEagle: Seamless Detection of Visually Similar Android Apps}, url = {https://doi.org/10.1145/2766498.2766508}, year = {2015}, bdsk-url-1 = {https://doi.org/10.1145/2766498.2766508}} @inproceedings{ieeespro2015-JunodRWM, author = {Pascal Junod and Julien Rinaldini and Johan Wehrli and Julie Michielin}, booktitle = {Proc. of the {IEEE/ACM} International Workshop on Software Protection (SPRO)}, date-added = {2021-10-12 07:37:32 +0200}, date-modified = {2021-10-12 07:37:48 +0200}, doi = {10.1109/SPRO.2015.10}, editor = {Brecht Wyseur}, pages = {3--9}, publisher = {IEEE}, title = {Obfuscator-{LLVM} -- Software Protection for the Masses}, year = {2015}, bdsk-url-1 = {https://doi.org/10.1109/SPRO.2015.10}} @article{stardroid16acm, abstract = {The security research community has invested significant effort in improving the security of Android applications over the past half decade. This effort has addressed a wide range of problems and resulted in the creation of many tools for application analysis. In this article, we perform the first systematization of Android security research that analyzes applications, characterizing the work published in more than 17 top venues since 2010. We categorize each paper by the types of problems they solve, highlight areas that have received the most attention, and note whether tools were ever publicly released for each effort. Of the released tools, we then evaluate a representative sample to determine how well application developers can apply the results of our community's efforts to improve their products. We find not only that significant work remains to be done in terms of research coverage but also that the tools suffer from significant issues ranging from lack of maintenance to the inability to produce functional output for applications with known vulnerabilities. We close by offering suggestions on how the community can more successfully move forward.}, address = {New York, NY, USA}, articleno = {55}, author = {Reaves, Bradley and Bowers, Jasmine and Gorski III, Sigmund Albert and Anise, Olabode and Bobhate, Rahul and Cho, Raymond and Das, Hiranava and Hussain, Sharique and Karachiwala, Hamza and Scaife, Nolen and Wright, Byron and Butler, Kevin and Enck, William and Traynor, Patrick}, date-added = {2021-10-11 23:41:02 +0200}, date-modified = {2021-10-11 23:41:08 +0200}, doi = {10.1145/2996358}, issn = {0360-0300}, issue_date = {December 2016}, journal = {ACM Comput. Surv.}, keywords = {program analysis, application security, Android}, month = oct, number = {3}, numpages = {30}, publisher = {Association for Computing Machinery}, title = {*droid: Assessment and Evaluation of Android Application Analysis Tools}, volume = {49}, year = {2016}, bdsk-url-1 = {https://doi.org/10.1145/2996358}} @inproceedings{repodroid18fse, abstract = {In recent years, researchers have developed a number of tools to conduct taint analysis of Android applications. While all the respective papers aim at providing a thorough empirical evaluation, comparability is hindered by varying or unclear evaluation targets. Sometimes, the apps used for evaluation are not precisely described. In other cases, authors use an established benchmark but cover it only partially. In yet other cases, the evaluations differ in terms of the data leaks searched for, or lack a ground truth to compare against. All those limitations make it impossible to truly compare the tools based on those published evaluations. We thus present ReproDroid, a framework allowing the accurate comparison of Android taint analysis tools. ReproDroid supports researchers in inferring the ground truth for data leaks in apps, in automatically applying tools to benchmarks, and in evaluating the obtained results. We use ReproDroid to comparatively evaluate on equal grounds the six prominent taint analysis tools Amandroid, DIALDroid, DidFail, DroidSafe, FlowDroid and IccTA. The results are largely positive although four tools violate some promises concerning features and accuracy. Finally, we contribute to the area of unbiased benchmarking with a new and improved version of the open test suite DroidBench.}, address = {New York, NY, USA}, author = {Pauck, Felix and Bodden, Eric and Wehrheim, Heike}, booktitle = {Proc. of the ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering (ESEC/FSE)}, date-added = {2021-10-11 23:37:12 +0200}, date-modified = {2021-10-11 23:37:32 +0200}, doi = {10.1145/3236024.3236029}, isbn = {9781450355735}, keywords = {Empirical Studies, Benchmarks, Reproducibility, Android Taint Analysis, Tools}, location = {Lake Buena Vista, FL, USA}, numpages = {11}, pages = {331--341}, publisher = {Association for Computing Machinery}, series = {ESEC/FSE 2018}, title = {Do Android Taint Analysis Tools Keep Their Promises?}, url = {https://doi.org/10.1145/3236024.3236029}, year = {2018}, bdsk-url-1 = {https://doi.org/10.1145/3236024.3236029}} @misc{google:art, author = {Google}, date-added = {2021-10-11 21:04:42 +0200}, date-modified = {2021-10-11 21:06:37 +0200}, howpublished = {\url{https://source.android.com/devices/tech/dalvik/configure}}, title = {Configuring ART}, year = {2021}} @misc{google:artprofiles, author = {Google}, date-added = {2021-10-11 21:00:46 +0200}, date-modified = {2021-10-11 21:01:15 +0200}, howpublished = {\url{https://android-developers.googleblog.com/2019/04/improving-app-performance-with-art.html}}, title = {Improving app performance with ART optimizing profiles in the cloud}, year = {2019}} @misc{google:jackjill, author = {Google}, date-added = {2021-10-11 20:37:19 +0200}, date-modified = {2021-10-11 20:37:19 +0200}, howpublished = {\url{http://tools.android.com/tech-docs/jackandjill}}, title = {Experimental New Android Tool Chain - Jack and Jill}, year = {2017}} @inproceedings{packware:ndss20, author = {Hojjat Aghakhani and Fabio Gritti and Francesco Mecca and Martina Lindorfer and Stefano Ortolani and Davide Balzarotti and Giovanni Vigna and Christopher Kruegel}, booktitle = {Proc. of the Network and Distributed System Security Symposium (NDSS)}, date-added = {2021-10-11 17:18:17 +0200}, date-modified = {2021-10-11 17:18:17 +0200}, title = {{When Malware is Packin' Heat; Limits of Machine Learning Classifiers Based on Static Analysis Features}}, year = {2020}} @misc{allatori, author = {Allatori}, date-added = {2021-10-11 15:08:53 +0200}, date-modified = {2021-10-11 15:09:47 +0200}, howpublished = {\url{http://www.allatori.com}}, title = {Allatori Java Obfuscator}, year = {2021}} @misc{dasho, author = {PreEmptive}, date-added = {2021-10-11 15:07:22 +0200}, date-modified = {2021-10-11 15:08:23 +0200}, howpublished = {\url{https://www.preemptive.com/products/dasho/}}, title = {DashO: Professional-grade Application Protection}, year = {2021}} @misc{dexguard, author = {Guardsquare}, date-added = {2021-10-11 15:05:16 +0200}, date-modified = {2021-10-11 15:06:26 +0200}, howpublished = {\url{https://www.guardsquare.com/dexguard}}, title = {DexGuard: Full spectrum protection for Android apps}, year = {2021}} @misc{lief:oat, author = {Romain Thomas}, date-added = {2021-10-11 14:40:41 +0200}, date-modified = {2021-10-11 14:41:33 +0200}, howpublished = {\url{https://lief-project.github.io/doc/latest/tutorials/10_android_formats.html}}, title = {LIEF Documentation: Android Formats}, year = {2021}} @misc{google:ndk, author = {Google}, date-added = {2021-10-11 13:57:57 +0200}, date-modified = {2021-10-11 20:38:24 +0200}, howpublished = {\url{https://developer.android.com/ndk/guides}}, title = {Get started with the NDK}, year = {2021}} @inproceedings{orchoser19compsac, author = {Peng, Yanru and Chen, Yuting and Shen, Beijun}, booktitle = {Proc. of the IEEE Annual Computer Software and Applications Conference (COMPSAC)}, date-added = {2021-10-11 12:06:47 +0200}, date-modified = {2021-10-11 12:07:15 +0200}, doi = {10.1109/COMPSAC.2019.00023}, pages = {97-106}, title = {An Adaptive Approach to Recommending Obfuscation Rules for Java Bytecode Obfuscators}, volume = {1}, year = {2019}, bdsk-url-1 = {https://doi.org/10.1109/COMPSAC.2019.00023}} @inproceedings{OSSPolice17ccs, address = {New York, NY, USA}, author = {Duan, Ruian and Bijlani, Ashish and Xu, Meng and Kim, Taesoo and Lee, Wenke}, booktitle = {Proc. of the ACM SIGSAC Conference on Computer and Communications Security (CCS)}, date-added = {2021-10-11 10:31:00 +0200}, date-modified = {2021-10-11 10:31:16 +0200}, doi = {10.1145/3133956.3134048}, isbn = {9781450349468}, keywords = {license violation, code clone detection, application security}, location = {Dallas, Texas, USA}, numpages = {17}, pages = {2169--2185}, publisher = {Association for Computing Machinery}, series = {CCS '17}, title = {Identifying Open-Source License Violation and 1-Day Security Risk at Large Scale}, url = {https://doi.org/10.1145/3133956.3134048}, year = {2017}, bdsk-url-1 = {https://doi.org/10.1145/3133956.3134048}} @inproceedings{pmls20wisec, address = {New York, NY, USA}, author = {Zhang, Zicheng and Diao, Wenrui and Hu, Chengyu and Guo, Shanqing and Zuo, Chaoshun and Li, Li}, booktitle = {Proc. of the ACM Conference on Security and Privacy in Wireless and Mobile Networks (WiSec)}, date-added = {2021-10-11 10:27:08 +0200}, date-modified = {2021-10-11 10:27:30 +0200}, doi = {10.1145/3395351.3399346}, isbn = {9781450380065}, keywords = {malware, Android apps, malicious third-party libraries}, location = {Linz, Austria}, numpages = {11}, pages = {144--154}, publisher = {Association for Computing Machinery}, series = {WiSec '20}, title = {{An Empirical Study of Potentially Malicious Third-Party Libraries in Android Apps}}, url = {https://doi.org/10.1145/3395351.3399346}, year = {2020}, bdsk-url-1 = {https://doi.org/10.1145/3395351.3399346}} @inproceedings{saturn2019spro, address = {New York, NY, USA}, author = {Garba, Peter and Favaro, Matteo}, booktitle = {Proc. of the ACM Workshop on Software Protection (SPRO)}, date-added = {2021-10-10 15:27:33 +0200}, date-modified = {2021-10-10 15:27:47 +0200}, doi = {10.1145/3338503.3357721}, isbn = {9781450368353}, keywords = {llvm, binary rewriting, obfuscation, deobfuscation, binary recompilation, code lifting, reverse engineering, static software analysis}, location = {London, United Kingdom}, numpages = {12}, pages = {27--38}, publisher = {Association for Computing Machinery}, series = {SPRO'19}, title = {SATURN - Software Deobfuscation Framework Based On LLVM}, url = {https://doi.org/10.1145/3338503.3357721}, year = {2019}, bdsk-url-1 = {https://doi.org/10.1145/3338503.3357721}} @inproceedings{bintuner21pldi, address = {New York, NY, USA}, author = {Ren, Xiaolei and Ho, Michael and Ming, Jiang and Lei, Yu and Li, Li}, booktitle = {Proc. of the ACM SIGPLAN International Conference on Programming Language Design and Implementation (PLDI)}, date-added = {2021-10-10 15:07:09 +0200}, date-modified = {2021-10-10 15:07:22 +0200}, doi = {10.1145/3453483.3454035}, isbn = {9781450383912}, keywords = {Binary Code Difference, Compiler Optimization}, location = {Virtual, Canada}, numpages = {16}, pages = {142--157}, publisher = {Association for Computing Machinery}, series = {PLDI 2021}, title = {Unleashing the Hidden Power of Compiler Optimization on Binary Code Difference: An Empirical Study}, url = {https://doi.org/10.1145/3453483.3454035}, year = {2021}, bdsk-url-1 = {https://doi.org/10.1145/3453483.3454035}} @inproceedings{binrec20eurosys, address = {New York, NY, USA}, articleno = {36}, author = {Altinay, Anil and Nash, Joseph and Kroes, Taddeus and Rajasekaran, Prabhu and Zhou, Dixin and Dabrowski, Adrian and Gens, David and Na, Yeoul and Volckaert, Stijn and Giuffrida, Cristiano and Bos, Herbert and Franz, Michael}, booktitle = {Proc. of the European Conference on Computer Systems (EuroSys)}, date-added = {2021-10-10 14:36:38 +0200}, date-modified = {2021-10-10 14:36:48 +0200}, doi = {10.1145/3342195.3387550}, isbn = {9781450368827}, location = {Heraklion, Greece}, numpages = {16}, publisher = {Association for Computing Machinery}, series = {EuroSys '20}, title = {BinRec: Dynamic Binary Lifting and Recompilation}, url = {https://doi.org/10.1145/3342195.3387550}, year = {2020}, bdsk-url-1 = {https://doi.org/10.1145/3342195.3387550}} @misc{app-bundles, author = {Google}, date-added = {2021-10-10 12:44:41 +0200}, date-modified = {2021-10-10 12:45:37 +0200}, howpublished = {\url{https://developer.android.com/guide/app-bundle}}, title = {{About Android App Bundles}}, year = 2021} @inproceedings{libscout16ccs, address = {New York, NY, USA}, author = {Backes, Michael and Bugiel, Sven and Derr, Erik}, booktitle = {Proc. of the ACM SIGSAC Conference on Computer and Communications Security (CCS)}, date-added = {2021-10-10 12:15:23 +0200}, date-modified = {2021-10-10 12:15:34 +0200}, doi = {10.1145/2976749.2978333}, isbn = {9781450341394}, keywords = {android, third-party library detection}, location = {Vienna, Austria}, numpages = {12}, pages = {356--367}, publisher = {Association for Computing Machinery}, series = {CCS '16}, title = {Reliable Third-Party Library Detection in Android and Its Security Applications}, url = {https://doi.org/10.1145/2976749.2978333}, year = {2016}, bdsk-url-1 = {https://doi.org/10.1145/2976749.2978333}} @inproceedings{centroid14icse, address = {New York, NY, USA}, author = {Chen, Kai and Liu, Peng and Zhang, Yingjun}, booktitle = {Proc. of the International Conference on Software Engineering (ICSE)}, date-added = {2021-10-10 12:12:20 +0200}, date-modified = {2021-10-10 12:12:56 +0200}, doi = {10.1145/2568225.2568286}, isbn = {9781450327565}, keywords = {centroid, clone detection, Android, Software analysis}, location = {Hyderabad, India}, numpages = {12}, pages = {175--186}, publisher = {Association for Computing Machinery}, series = {ICSE 2014}, title = {Achieving Accuracy and Scalability Simultaneously in Detecting Application Clones on Android Markets}, url = {https://doi.org/10.1145/2568225.2568286}, year = {2014}, bdsk-url-1 = {https://doi.org/10.1145/2568225.2568286}} @inproceedings{droidsim14ifip, abstract = {Recently smartphones and mobile devices have gained incredible popularity for their vibrant feature-rich applications (or apps). Because it is easy to repackage Android apps, software plagiarism has become a serious problem. In this paper, we present an accurate and robust system DroidSim to detect code reuse. DroidSim calculates similarity score only with component-based control flow graph (CB-CFG). CB-CFG is a graph of which nodes are Android APIs and edges represent control flow precedence order in each Android component. Our system can be applied to detect repackaged apps and malware variants. We evaluate DroidSim on 121 apps and 706 malware variants. The results show that our system has no false negative and a false positive of 0.83{\%} for repackaged apps, and a detection ratio of 96.60{\%} for malware variants. Besides, ADAM is used to obfuscate apps and the result reveals that ADAM has no influence on our system.}, address = {Berlin, Heidelberg}, author = {Sun, Xin and Zhongyang, Yibing and Xin, Zhi and Mao, Bing and Xie, Li}, booktitle = {Proc. of the IFIP International Conference on Systems Security and Privacy Protection (IFIP SEC)}, date-added = {2021-10-10 12:07:56 +0200}, date-modified = {2021-10-10 12:11:00 +0200}, editor = {Cuppens-Boulahia, Nora and Cuppens, Fr{\'e}d{\'e}ric and Jajodia, Sushil and Abou El Kalam, Anas and Sans, Thierry}, isbn = {978-3-642-55415-5}, pages = {142--155}, publisher = {Springer Berlin Heidelberg}, title = {Detecting Code Reuse in Android Applications Using Component-Based Control Flow Graph}, year = {2014}} @inproceedings{elsim12hicss, author = {Desnos, Anthony}, booktitle = {Proc. of the Hawaii International Conference on System Sciences (HICSS)}, date-added = {2021-10-10 12:04:36 +0200}, date-modified = {2021-10-10 12:05:48 +0200}, doi = {10.1109/HICSS.2012.114}, pages = {5394-5403}, title = {Android: Static Analysis Using Similarity Distance}, year = {2012}, bdsk-url-1 = {https://doi.org/10.1109/HICSS.2012.114}} @inproceedings{andradar:dimva14, author = {Lindorfer, Martina and Volanis, Stamatis and Sisto, Alessandro and Neugschwandtner, Matthias and Athanasopoulos, Elias and Maggi, Federico and Platzer, Christian and Zanero, Stefano and Ioannidis, Sotiris}, booktitle = {Proc. of the Conference on Detection of Intrusions and Malware \& Vulnerability Assessment (DIMVA)}, date-added = {2021-10-10 12:04:06 +0200}, date-modified = {2021-10-10 12:04:06 +0200}, title = {{AndRadar: Fast Discovery of Android Applications in Alternative Markets}}, year = {2014}} @inproceedings{libraries20ase, author = {Zhan, Xian and Fan, Lingling and Liu, Tianming and Chen, Sen and Li, Li and Wang, Haoyu and Xu, Yifei and Luo, Xiapu and Liu, Yang}, booktitle = {Proc. of the IEEE/ACM International Conference on Automated Software Engineering (ASE)}, date-added = {2021-10-10 11:56:03 +0200}, date-modified = {2021-10-10 11:56:16 +0200}, pages = {919-930}, title = {Automated Third-Party Library Detection for Android Applications: Are We There Yet?}, year = {2020}} @inproceedings{codematch17fse, address = {New York, NY, USA}, author = {Glanz, Leonid and Amann, Sven and Eichberg, Michael and Reif, Michael and Hermann, Ben and Lerch, Johannes and Mezini, Mira}, booktitle = {Proc. of the Joint Meeting on Foundations of Software Engineering (ESEC/FSE)}, date-added = {2021-10-10 11:43:39 +0200}, date-modified = {2021-10-10 12:31:42 +0200}, doi = {10.1145/3106237.3106305}, isbn = {9781450351058}, keywords = {obfuscation, library detection, code analysis, repackage detection}, location = {Paderborn, Germany}, numpages = {11}, pages = {638--648}, publisher = {Association for Computing Machinery}, series = {ESEC/FSE 2017}, title = {CodeMatch: Obfuscation Won't Conceal Your Repackaged App}, url = {https://doi.org/10.1145/3106237.3106305}, year = {2017}, bdsk-url-1 = {https://doi.org/10.1145/3106237.3106305}} @misc{google:kotlin, author = {Google}, howpublished = {\url{https://developer.android.com/kotlin/first}}, title = {Android's Kotlin-first approach}, year = {2021}} @misc{proguardvsv8, author = {Guardsquare}, howpublished = {\url{hhttps://www.guardsquare.com/blog/proguard-and-r8}}, month = {7}, title = {ProGuard and R8: Comparing Optimizers}, year = {2018}} @misc{proguardvsv8new, author = {Guardsquare}, howpublished = {\url{hhttps://www.guardsquare.com/blog/comparison-proguard-vs-r8-october-2019-edition}}, month = {10}, title = {Comparison of ProGuard vs. R8: October 2019 edition}, year = {2019}} @misc{google:lvlapps, author = {Google}, howpublished = {\url{https://android-developers.googleblog.com/2010/09/securing-android-lvl-applications.html}}, title = {Securing Android LVL Applications}, year = {2010}} @misc{google:r8default, author = {Google}, date-added = {2021-10-08 16:58:42 +0200}, date-modified = {2021-10-08 16:59:59 +0200}, howpublished = {\url{https://developer.android.com/studio/releases/gradle-plugin\#3-4-0}}, title = {Android Gradle plugin release notes: 3.4.0 (April 2019)}, year = {2021}} @misc{google:gradle, author = {Google}, howpublished = {\url{https://developer.android.com/studio/build}}, title = {Configure your build}, year = {2021}} @misc{google:android2.3, author = {Google}, date-added = {2021-10-08 16:44:35 +0200}, date-modified = {2021-10-08 16:46:25 +0200}, howpublished = {\url{https://android-developers.googleblog.com/2010/12/android-23-platform-and-updated-sdk.html}}, month = {12}, title = {Android 2.3 Platform and Updated SDK Tools}, year = {2010}} @inproceedings{citizendeveloper18sp, author = {Oltrogge, Marten and Derr, Erik and Stransky, Christian and Acar, Yasemin and Fahl, Sascha and Rossow, Christian and Pellegrino, Giancarlo and Bugiel, Sven and Backes, Michael}, booktitle = {Proc. of the IEEE Symposium on Security and Privacy (S\&P)}, date-added = {2021-10-08 15:43:16 +0200}, date-modified = {2021-10-08 15:43:38 +0200}, doi = {10.1109/SP.2018.00005}, pages = {634-647}, title = {The Rise of the Citizen Developer: Assessing the Security Impact of Online App Generators}, year = {2018}, bdsk-url-1 = {https://doi.org/10.1109/SP.2018.00005}} @misc{statista:total, author = {Statista}, date-modified = {2021-10-08 16:00:47 +0200}, howpublished = {\url{https://www.statista.com/statistics/266210/number-of-available-applications-in-the-google-play-store/}}, month = {9}, title = {Number of available applications in the Google Play Store from December 2009 to July 2021}, urldate = {2021-10-08}, year = {2021}} @misc{androidstats, author = {Statcounter}, howpublished = {\url{https://gs.statcounter.com/os-market-share}}, title = {Operating System Market Share Worldwide}, urldate = {2021-10-08}, year = {2021}} @misc{statista:month, author = {Statista}, date-modified = {2021-10-08 16:00:42 +0200}, howpublished = {\url{https://www.statista.com/statistics/1020956/android-app-releases-worldwide/}}, month = {10}, title = {Average number of new Android app releases via Google Play per month from March 2019 to August 2021}, urldate = {2021-10-08}, year = {2021}} @misc{aptoide, author = {Aptoide}, date-added = {2021-10-08 15:33:38 +0200}, date-modified = {2021-10-08 15:34:06 +0200}, howpublished = {\url{https://en.aptoide.com/company/about-us}}, title = {The game-changing alternative Android app store}, urldate = {2021-10-08}, year = {2021}} @misc{ghidra, author = {NSA}, date-added = {2021-10-08 14:22:37 +0200}, date-modified = {2021-10-08 14:22:52 +0200}, howpublished = {\url{https://ghidra-sre.org}}, title = {Ghidra}} @misc{objdump, date-added = {2021-10-08 14:21:26 +0200}, date-modified = {2021-10-08 14:22:20 +0200}, howpublished = {\url{https://linux.die.net/man/1/objdump}}, title = {arm-linux-gnueabi-objdump}} @misc{binaryninja, date-added = {2021-10-08 14:20:31 +0200}, date-modified = {2021-10-08 14:20:43 +0200}, howpublished = {\url{https://binary.ninja}}, title = {BinaryNinja}} @misc{idapro, author = {Hex-Rays}, date-added = {2021-10-08 14:19:46 +0200}, date-modified = {2021-10-08 14:20:21 +0200}, howpublished = {\url{https://hex-rays.com/ida-pro/}}, title = {IDA Pro}} @misc{hopper, date-added = {2021-10-08 14:19:24 +0200}, date-modified = {2021-10-08 14:19:34 +0200}, howpublished = {\url{https://www.hopperapp.com}}, title = {Hopper}} @misc{radare, date-added = {2021-10-08 14:18:51 +0200}, date-modified = {2021-10-08 14:19:13 +0200}, howpublished = {\url{https://rada.re/}}, title = {Radare2}} @inproceedings{angr16sp, author = {Shoshitaishvili, Yan and Wang, Ruoyu and Salls, Christopher and Stephens, Nick and Polino, Mario and Dutcher, Andrew and Grosen, John and Feng, Siji and Hauser, Christophe and Kruegel, Christopher and Vigna, Giovanni}, booktitle = {Proc. of the IEEE Symposium on Security and Privacy (S\&P)}, date-added = {2021-10-08 14:17:52 +0200}, date-modified = {2021-10-08 14:18:13 +0200}, doi = {10.1109/SP.2016.17}, pages = {138-157}, title = {SOK: (State of) The Art of War: Offensive Techniques in Binary Analysis}, year = {2016}, bdsk-url-1 = {https://doi.org/10.1109/SP.2016.17}} @inproceedings{bap11cav, abstract = {BAP is a publicly available infrastructure for performing program verification and analysis tasks on binary (i.e., executable) code. In this paper, we describe BAP as well as lessons learned from previous incarnations of binary analysis platforms. BAP explicitly represents all side effects of instructions in an intermediate language (IL), making syntaxdirected analysis possible. We have used BAP to routinely generate and solve verification conditions that are hundreds of megabytes in size and encompass 100,000's of assembly instructions.}, address = {Berlin, Heidelberg}, author = {Brumley, David and Jager, Ivan and Avgerinos, Thanassis and Schwartz, Edward J.}, booktitle = {Proc. of the International Conference on Computer Aided Verification (CAV)}, date-added = {2021-10-08 14:17:01 +0200}, date-modified = {2021-10-08 14:17:26 +0200}, isbn = {978-3-642-22110-1}, publisher = {Springer Berlin Heidelberg}, title = {BAP: A Binary Analysis Platform}, year = {2011}} @inproceedings{nucleus:17sp, author = {Andriesse, Dennis and Slowinska, Asia and Bos, Herbert}, booktitle = {Proc. of the IEEE European Symposium on Security and Privacy (EuroS\&P)}, date-added = {2021-10-08 14:10:23 +0200}, date-modified = {2021-10-08 14:10:36 +0200}, doi = {10.1109/EuroSP.2017.11}, pages = {177-189}, title = {Compiler-Agnostic Function Detection in Binaries}, year = {2017}, bdsk-url-1 = {https://doi.org/10.1109/EuroSP.2017.11}} @misc{wikipedia:applist, author = {Wikipedia}, date-added = {2021-10-08 13:57:21 +0200}, date-modified = {2021-10-08 13:57:49 +0200}, howpublished = {\url{https://en.wikipedia.org/wiki/List_of_free_and_open-source_Android_applications}}, title = {List of free and open-source Android applications}, urldate = {2021-06-07}} @inproceedings{androzoo2020, author = {Liu, Pei and Li, Li and Zhao, Yanjie and Sun, Xiaoyu and Grundy, John}, date-added = {2021-10-08 13:46:02 +0200}, date-modified = {2021-10-08 13:46:02 +0200}, journal = {\msr{}}, title = {AndroZooOpen: Collecting Large-Scale Open Source Android Apps for the Research Community}, year = {2020}} @inproceedings{multimodal2019rajasegaran, address = {{San Francisco, CA, USA}}, annotation = {ZSCC: 0000002}, author = {Rajasegaran, Jathushan and Karunanayake, Naveen and Gunathillake, Ashanie and Seneviratne, Suranga and Jourjon, Guillaume}, booktitle = {The {{World Wide Web Conference}} on - {{WWW}} '19}, date-added = {2021-10-08 13:45:30 +0200}, date-modified = {2021-10-08 13:45:30 +0200}, doi = {10.1145/3308558.3313427}, isbn = {978-1-4503-6674-8}, pages = {3165--3171}, publisher = {{ACM Press}}, title = {A {{Multi}}-Modal {{Neural Embeddings Approach}} for {{Detecting Mobile Counterfeit Apps}}}, year = {2019}, bdsk-url-1 = {https://doi.org/10.1145/3308558.3313427}} @misc{google-play-signing, author = {Google}, date-added = {2021-10-08 13:44:25 +0200}, date-modified = {2021-10-08 13:44:33 +0200}, howpublished = {\url{https://developer.android.com/studio/publish/app-signing}}, title = {{Sign your app}}, year = {2021}} @misc{redex, author = {Facebook}, date-added = {2021-10-08 12:33:40 +0200}, date-modified = {2021-10-08 12:34:06 +0200}, howpublished = {\url{https://fbredex.com}}, title = {Redex: An Android Bytecode Optimizer}} @misc{quarkslab:diffing3, author = {Tom Czayka and Romain Thomas}, date-added = {2021-10-08 12:32:05 +0200}, date-modified = {2021-10-08 12:32:05 +0200}, howpublished = {\url{https://blog.quarkslab.com/android-application-diffing-analysis-of-modded-version.html}}, month = {5}, title = {Android Application Diffing: Analysis of Modded Version}, year = {2019}} @misc{quarkslab:diffing2, author = {Tom Czayka and Romain Thomas}, date-added = {2021-10-08 12:31:07 +0200}, date-modified = {2021-10-08 12:32:26 +0200}, howpublished = {\url{https://blog.quarkslab.com/android-application-diffing-cve-2019-10875-inspection.html}}, month = {5}, title = {Android Application Diffing: CVE-2019-10875 Inspection}, year = {2019}} @article{lineage, author = {Irfan Ul Haq and Sergio Chica and Juan Caballero and Somesh Jha}, title = {{Malware Lineage in the Wild}}, journal = {Computers \& Security}, publisher = {Elsevier}, volume = {78}, year = {2018}, pages = {347--363}, issn = {0167-4048}, doi = {10.1016/j.cose.2018.07.012}, jcr = {2.862}, } @misc{quarkslab:diffing, author = {Tom Czayka and Romain Thomas}, date-added = {2021-10-08 12:29:26 +0200}, date-modified = {2021-10-08 12:30:09 +0200}, howpublished = {\url{https://blog.quarkslab.com/android-application-diffing-engine-overview.html}}, month = {4}, title = {Android Application Diffing: Engine Overview}, year = {2019}} @misc{r8optimizer, author = {Google}, date-added = {2021-10-08 12:22:49 +0200}, date-modified = {2021-10-08 12:23:39 +0200}, howpublished = {\url{https://developer.android.com/studio/build/shrink-code}}, title = {Shrink, obfuscate, and optimize your app}, urldate = {2021-10-08}, year = {2021}} @misc{proguardgithub, author = {Guardsquare}, date-added = {2021-10-08 12:21:31 +0200}, date-modified = {2021-10-11 15:05:45 +0200}, howpublished = {\url{https://www.guardsquare.com/proguard}}, journal = {GitHub repository}, publisher = {GitHub}, title = {ProGuard: Java optimizer and obfuscator}, year = {2021}} @misc{proguard, author = {Guardsquare}, date-added = {2021-10-08 12:19:18 +0200}, date-modified = {2021-10-11 15:05:53 +0200}, howpublished = {\url{https://github.com/Guardsquare/proguard}}, title = {ProGuard: Shrink your Java and Android code}, year = {2021}} @inproceedings{antiproguard2017, abstract = {A wide adoption of obfuscation techniques by Android application developers, and especially malware authors, introduces a high degree of complication into the process of reverse engineering, analysis, and security evaluation of third-party and potentially harmful apps.In this paper we present the early results of our research aiming to provide reliable means for automated deobfuscation of Android apps. According to the underlying approach, deobfuscation of a given app is performed by matching its code parts to the unobfuscated code stored in a database. For this purpose we apply well-known software similarity algorithms, such as SimHash and n-gram based ones. As a source of unobfuscated code can serve open source apps and libraries, as well as previously analyzed and manually deobfuscated code.Although the presented techniques are generic in their nature, our current prototype mainly targets Proguard, as one of the most widely used protection tools for Android performing primarily renaming obfuscation. The evaluation of the presented Anti-ProGuard tool witnesses its effectiveness for the considered task and supports the feasibility of the proposed approach.}, address = {New York, NY, USA}, author = {Baumann, Richard and Protsenko, Mykolai and M\"{u}ller, Tilo}, booktitle = {Proc. of the Workshop on Security in Highly Connected IT Systems (SHCIS)}, date-added = {2021-10-08 12:17:33 +0200}, date-modified = {2021-10-08 12:17:49 +0200}, doi = {10.1145/3099012.3099020}, isbn = {9781450352710}, keywords = {Software Similarity, Android, Deobfuscation, Reverse Engineering}, location = {Neuch\^{a}tel, Switzerland}, numpages = {6}, pages = {7--12}, publisher = {Association for Computing Machinery}, series = {SHCIS '17}, title = {Anti-ProGuard: Towards Automated Deobfuscation of Android Apps}, url = {https://doi.org/10.1145/3099012.3099020}, year = {2017}, bdsk-url-1 = {https://doi.org/10.1145/3099012.3099020}} @inproceedings{armdisassemblers20issta, abstract = {With the increasing popularity of embedded devices, ARM is becoming the dominant architecture for them. In the meanwhile, there is a pressing need to perform security assessments for these devices. Due to different types of peripherals, it is challenging to dynamically run the firmware of these devices in an emulated environment. Therefore, the static analysis is still commonly used. Existing work usually leverages off-the-shelf tools to disassemble stripped ARM binaries and (implicitly) assume that reliable disassembling binaries and function recognition are solved problems. However, whether this assumption really holds is unknown. In this paper, we conduct the first comprehensive study on ARM disassembly tools. Specifically, we build 1,896 ARM binaries (including 248 obfuscated ones) with different compilers, compiling options, and obfuscation methods. We then evaluate them using eight state-of-the-art ARM disassembly tools (including both commercial and noncommercial ones) on their capabilities to locate instructions and function boundaries. These two are fundamental ones, which are leveraged to build other primitives. Our work reveals some observations that have not been systematically summarized and/or confirmed. For instance, we find that the existence of both ARM and Thumb instruction sets, and the reuse of the BL instruction for both function calls and branches bring serious challenges to disassembly tools. Our evaluation sheds light on the limitations of state-of-the-art disassembly tools and points out potential directions for improvement. To engage the community, we release the data set, and the related scripts at https://github.com/valour01/arm_disasssembler_study.}, address = {New York, NY, USA}, author = {Jiang, Muhui and Zhou, Yajin and Luo, Xiapu and Wang, Ruoyu and Liu, Yang and Ren, Kui}, booktitle = {Proc. of the ACM SIGSOFT International Symposium on Software Testing and Analysis (ISSTA)}, date-added = {2021-10-08 12:09:48 +0200}, date-modified = {2021-10-08 12:10:02 +0200}, doi = {10.1145/3395363.3397377}, isbn = {9781450380089}, keywords = {ARM Architecture, Disassembly Tools, Empirical Study}, location = {Virtual Event, USA}, numpages = {14}, pages = {401--414}, publisher = {Association for Computing Machinery}, series = {ISSTA 2020}, title = {An Empirical Study on ARM Disassembly Tools}, url = {https://doi.org/10.1145/3395363.3397377}, year = {2020}, bdsk-url-1 = {https://doi.org/10.1145/3395363.3397377}} @article{droidskynet21tdsc, author = {Zhang, Yue and Weng, Jian and Weng, Jiasi and Hou, Lin and Yang, Anjia and Li, Ming and Xiang, Yang and Deng, Robert H.}, date-added = {2021-10-08 11:49:41 +0200}, date-modified = {2021-10-08 11:49:51 +0200}, doi = {10.1109/TDSC.2019.2914202}, journal = {IEEE Transactions on Dependable and Secure Computing}, number = {2}, pages = {652-666}, title = {Looking Back! Using Early Versions of Android Apps as Attack Vectors}, volume = {18}, year = {2021}, bdsk-url-1 = {https://doi.org/10.1109/TDSC.2019.2914202}} @inproceedings{bscout20usenix, author = {Jiarun Dai and Yuan Zhang and Zheyue Jiang and Yingtian Zhou and Junyan Chen and Xinyu Xing and Xiaohan Zhang and Xin Tan and Min Yang and Zhemin Yang}, booktitle = {Proc. of the USENIX Security Symposium}, date-added = {2021-10-08 11:46:24 +0200}, date-modified = {2021-10-08 11:46:39 +0200}, isbn = {978-1-939133-17-5}, month = aug, pages = {1147--1164}, publisher = {{USENIX} Association}, title = {BScout: Direct Whole Patch Presence Test for Java Executables}, url = {https://www.usenix.org/conference/usenixsecurity20/presentation/dai}, year = {2020}, bdsk-url-1 = {https://www.usenix.org/conference/usenixsecurity20/presentation/dai}} @inproceedings{kotlindetector21mobilesoft, address = {Los Alamitos, CA, USA}, author = {Fadi Mohsen and Loran Oosterhaven and Fatih Turkmen}, booktitle = {Proc. of the IEEE/ACM International Conference on Mobile Software Engineering and Systems (MobileSoft)}, date-added = {2021-10-08 11:44:23 +0200}, date-modified = {2021-10-08 11:45:02 +0200}, doi = {10.1109/MobileSoft52590.2021.00018}, keywords = {privacy;java;switches;tools;feature extraction;mobile applications;software reliability}, pages = {84-93}, publisher = {IEEE Computer Society}, title = {KotlinDetector: Towards Understanding the Implications of Using Kotlin in Android Applications}, url = {https://doi.ieeecomputersociety.org/10.1109/MobileSoft52590.2021.00018}, year = {2021}, bdsk-url-1 = {https://doi.ieeecomputersociety.org/10.1109/MobileSoft52590.2021.00018}, bdsk-url-2 = {https://doi.org/10.1109/MobileSoft52590.2021.00018}} @inproceedings{decompilers21saner, author = {Mauthe, Noah and Karg{\'e}n, Ulf and Shahmehri, Nahid}, booktitle = {Proc. of the IEEE International Conference on Software Analysis, Evolution and Reengineering (SANER)}, date-added = {2021-10-08 11:42:38 +0200}, date-modified = {2021-10-08 11:42:48 +0200}, doi = {10.1109/SANER50967.2021.00044}, pages = {400-410}, title = {A Large-Scale Empirical Study of Android App Decompilation}, year = {2021}, bdsk-url-1 = {https://doi.org/10.1109/SANER50967.2021.00044}} @article{awa21tdsc, author = {Ma, Haoyu and Li, Shijia and Gao, Debin and Wu, Daoyuan and Jia, Qiaowen and Jia, Chunfu}, date-added = {2021-10-08 11:40:54 +0200}, date-modified = {2021-10-08 11:41:03 +0200}, doi = {10.1109/TDSC.2021.3100877}, journal = {IEEE Transactions on Dependable and Secure Computing}, pages = {1-1}, title = {Active Warden Attack: On the (In)Effectiveness of Android App Repackage-Proofing}, year = {2021}, bdsk-url-1 = {https://doi.org/10.1109/TDSC.2021.3100877}} @inproceedings{droidpro18trustcom, author = {Bao, Judong and He, Yongqiang and Wen, Weiping}, booktitle = {Proc. of the IEEE International Conference On Trust, Security And Privacy In Computing And Communications (TrustCom)}, date-added = {2021-10-08 11:24:02 +0200}, date-modified = {2021-10-08 11:24:30 +0200}, doi = {10.1109/TrustCom/BigDataSE.2018.00093}, pages = {624-632}, title = {DroidPro: An AOTC-Based Bytecode-Hiding Scheme for Packing the Android Applications}, year = {2018}, bdsk-url-1 = {https://doi.org/10.1109/TrustCom/BigDataSE.2018.00093}} @inproceedings{obfdetection17mobilesoft, author = {Wang, Yan and Rountev, Atanas}, booktitle = {Proc. of the IEEE/ACM International Conference on Mobile Software Engineering and Systems (MOBILESoft)}, date-added = {2021-10-08 11:21:57 +0200}, date-modified = {2021-10-08 11:22:16 +0200}, doi = {10.1109/MOBILESoft.2017.18}, pages = {154-164}, title = {Who Changed You? Obfuscator Identification for Android}, year = {2017}, bdsk-url-1 = {https://doi.org/10.1109/MOBILESoft.2017.18}} @inproceedings{oblive19saner, author = {Pizzolotto, Davide and Fellin, Roberto and Ceccato, Mariano}, booktitle = {Proc. of the IEEE International Conference on Software Analysis, Evolution and Reengineering (SANER)}, date-added = {2021-10-08 11:20:52 +0200}, date-modified = {2021-10-08 11:21:03 +0200}, doi = {10.1109/SANER.2019.8667982}, pages = {629-633}, title = {OBLIVE: Seamless Code Obfuscation for Java Programs and Android Apps}, year = {2019}, bdsk-url-1 = {https://doi.org/10.1109/SANER.2019.8667982}} @inproceedings{kotlinvsjava21, author = {Hecht, Geoffrey and Bergel, Alexandre}, booktitle = {Proc. of the IEEE/ACM International Conference on Mobile Software Engineering and Systems (MobileSoft)}, date-added = {2021-10-08 11:18:29 +0200}, date-modified = {2021-10-08 11:19:40 +0200}, doi = {10.1109/MobileSoft52590.2021.00019}, pages = {94-98}, title = {Quantifying the adoption of Kotlin on Android stores: Insight from the bytecode}, year = {2021}, bdsk-url-1 = {https://doi.org/10.1109/MobileSoft52590.2021.00019}} @inproceedings{libid19issta, address = {New York, NY, USA}, author = {Zhang, Jiexin and Beresford, Alastair R. and Kollmann, Stephan A.}, booktitle = {Proc. of the ACM SIGSOFT International Symposium on Software Testing and Analysis (ISSTA)}, date-added = {2021-10-08 11:16:25 +0200}, date-modified = {2021-10-08 11:16:39 +0200}, doi = {10.1145/3293882.3330563}, isbn = {9781450362245}, keywords = {ProGuard, Obfuscation, Android, Third-party library}, location = {Beijing, China}, numpages = {11}, pages = {55--65}, publisher = {Association for Computing Machinery}, series = {ISSTA 2019}, title = {LibID: Reliable Identification of Obfuscated Third-Party Android Libraries}, url = {https://doi.org/10.1145/3293882.3330563}, year = {2019}, bdsk-url-1 = {https://doi.org/10.1145/3293882.3330563}} @inproceedings{orlis18libs, author = {Wang, Yan and Wu, Haowei and Zhang, Hailong and Rountev, Atanas}, booktitle = {Proc. of the IEEE/ACM International Conference on Mobile Software Engineering and Systems (MOBILESoft)}, date-added = {2021-10-08 11:15:54 +0200}, date-modified = {2021-10-08 11:16:06 +0200}, pages = {13-23}, title = {Orlis: Obfuscation-Resilient Library Detection for Android}, year = {2018}} @inproceedings{zungurappjitsu, author = {Zungur, Onur and Bianchi, Antonio and Stringhini, Gianluca and Egele, Manuel}, booktitle = {Proc. of the IEEE European Symposium on Security and Privacy (EuroS\&P)}, date-added = {2021-10-08 11:12:44 +0200}, date-modified = {2021-10-08 11:13:08 +0200}, title = {APPJITSU: Investigating the Resiliency of Android Applications}, year = {2021}} @inproceedings{haupert2018honey, author = {Haupert, Vincent and Maier, Dominik and Schneider, Nicolas and Kirsch, Julian and M{\"u}ller, Tilo}, booktitle = {Proc. of the Conference on Detection of Intrusions and Malware \& Vulnerability Assessment}, date-added = {2021-10-08 11:09:58 +0200}, date-modified = {2021-10-08 11:10:43 +0200}, organization = {Springer}, pages = {69--91}, title = {Honey, I Shrunk Your App Security: The State of Android App Hardening}, year = {2018}} @article{libsurvey21tse, author = {Zhan, Xian and Liu, Tianming and Liu, Yepang and Liu, Yang and Li, Li and Wang, Haoyu and Luo, Xiapu}, date-added = {2021-10-08 11:05:47 +0200}, date-modified = {2021-10-08 11:05:56 +0200}, doi = {10.1109/TSE.2021.3115506}, journal = {IEEE Transactions on Software Engineering}, pages = {1-1}, title = {A Systematic Assessment on Android Third-party Library Detection Tools}, year = {2021}, bdsk-url-1 = {https://doi.org/10.1109/TSE.2021.3115506}} @article{packergrind, author = {Xue, Lei and Zhou, Hao and Luo, Xiapu and Yu, Le and Wu, Dinghao and Zhou, Yajin and Ma, Xiaobo}, date-added = {2021-10-08 11:04:51 +0200}, date-modified = {2021-10-08 11:04:58 +0200}, doi = {10.1109/TSE.2020.2996433}, journal = {IEEE Transactions on Software Engineering}, pages = {1-1}, title = {PackerGrind: An Adaptive Unpacking System for Android Apps}, year = {2020}, bdsk-url-1 = {https://doi.org/10.1109/TSE.2020.2996433}} @inproceedings{deguard16ccs, address = {New York, NY, USA}, author = {Bichsel, Benjamin and Raychev, Veselin and Tsankov, Petar and Vechev, Martin}, booktitle = {Proc. of the ACM SIGSAC Conference on Computer and Communications Security (CCS)}, date-added = {2021-10-08 11:03:26 +0200}, date-modified = {2021-10-08 11:03:38 +0200}, doi = {10.1145/2976749.2978422}, isbn = {9781450341394}, keywords = {program deobfuscation, malware inspection, reverse engineering}, location = {Vienna, Austria}, numpages = {13}, pages = {343--355}, publisher = {Association for Computing Machinery}, series = {CCS '16}, title = {Statistical Deobfuscation of Android Applications}, url = {https://doi.org/10.1145/2976749.2978422}, year = {2016}, bdsk-url-1 = {https://doi.org/10.1145/2976749.2978422}} @inproceedings{Obfuscator-LLVM19icse, author = {Kan, Zeliang and Wang, Haoyu and Wu, Lei and Guo, Yao and Xu, Guoai}, booktitle = {Companion Proc. of the IEEE/ACM International Conference on Software Engineering (ICSE-Companion)}, date-added = {2021-10-08 11:00:09 +0200}, date-modified = {2021-10-08 11:00:44 +0200}, doi = {10.1109/ICSE-Companion.2019.00135}, pages = {322-323}, title = {Deobfuscating Android Native Binary Code}, year = {2019}, bdsk-url-1 = {https://doi.org/10.1109/ICSE-Companion.2019.00135}} @article{obfuscapk2020aonzo, author = {Simone Aonzo and Gabriel Claudiu Georgiu and Luca Verderame and Alessio Merlo}, date-added = {2021-10-08 10:58:31 +0200}, date-modified = {2021-10-08 10:58:31 +0200}, doi = {https://doi.org/10.1016/j.softx.2020.100403}, issn = {2352-7110}, journal = {SoftwareX}, keywords = {Android, Obfuscation, Program analysis}, pages = {100403}, title = {Obfuscapk: An Open-source Black-box Obfuscation tool for Android Apps}, Note = {Source code: \url{https://github.com/ClaudiuGeorgiu/Obfuscapk}}, volume = {11}, year = {2020}, bdsk-url-1 = {https://www.sciencedirect.com/science/article/pii/S2352711019302791}, bdsk-url-2 = {https://doi.org/10.1016/j.softx.2020.100403}} @inproceedings{obf18acsac, address = {New York, NY, USA}, author = {Wermke, Dominik and Huaman, Nicolas and Acar, Yasemin and Reaves, Bradley and Traynor, Patrick and Fahl, Sascha}, booktitle = {Proc. of the Annual Computer Security Applications Conference (ACSAC)}, date-added = {2021-10-08 10:57:32 +0200}, date-modified = {2021-10-08 10:57:42 +0200}, doi = {10.1145/3274694.3274726}, isbn = {9781450365697}, keywords = {Android, User Study, Obfuscation}, location = {San Juan, PR, USA}, numpages = {14}, pages = {222--235}, publisher = {Association for Computing Machinery}, series = {ACSAC '18}, title = {A Large Scale Investigation of Obfuscation Use in Google Play}, url = {https://doi.org/10.1145/3274694.3274726}, year = {2018}, bdsk-url-1 = {https://doi.org/10.1145/3274694.3274726}} @inproceedings{obf18securecomm, address = {Cham}, author = {Dong, Shuaike and Li, Menghao and Diao, Wenrui and Liu, Xiangyu and Liu, Jian and Li, Zhou and Xu, Fenghao and Chen, Kai and Wang, XiaoFeng and Zhang, Kehuan}, booktitle = {Proc. of the International Conference on Security and Privacy in Communication Systems (SecureComm)}, date-added = {2021-10-08 10:55:53 +0200}, date-modified = {2021-10-08 10:56:23 +0200}, editor = {Beyah, Raheem and Chang, Bing and Li, Yingjiu and Zhu, Sencun}, isbn = {978-3-030-01701-9}, pages = {172--192}, publisher = {Springer International Publishing}, title = {Understanding Android Obfuscation Techniques: A Large-Scale Investigation in the Wild}, year = {2018}} @inproceedings{droidunpack18ndss, author = {Duan, Yue and Zhang, Mu and Bhaskar, Abhishek and Yin, Heng and Pan, Xiaorui and Li, Tongxin and Wang, Xueqiang and Wang, Xiaofeng}, booktitle = {Proc. of the Network and Distributed System Security Symposium (NDSS)}, date-added = {2021-10-08 10:50:30 +0200}, date-modified = {2021-10-08 10:50:56 +0200}, doi = {10.14722/ndss.2018.23303}, month = {01}, title = {Things You May Not Know About Android (Un)Packers: A Systematic Study based on Whole-System Emulation}, year = {2018}, bdsk-url-1 = {https://doi.org/10.14722/ndss.2018.23303}} @InProceedings{binarm18dimva, author="Shirani, Paria and Collard, Leo and Agba, Basile L. and Lebel, Bernard and Debbabi, Mourad and Wang, Lingyu and Hanna, Aiman", editor="Giuffrida, Cristiano and Bardin, S{\'e}bastien and Blanc, Gregory", title="BinARM: Scalable and Efficient Detection of Vulnerabilities in Firmware Images of Intelligent Electronic Devices", booktitle = {Proc. of the Conference on Detection of Intrusions and Malware \& Vulnerability Assessment (DIMVA)}, year="2018", publisher="Springer International Publishing", address="Cham", pages="114--138", isbn="978-3-319-93411-2" } @INPROCEEDINGS{multimh15sp, author={Pewny, Jannik and Garmany, Behrad and Gawlik, Robert and Rossow, Christian and Holz, Thorsten}, booktitle={Proc. of the IEEE Symposium on Security and Privacy (S\&P)}, title={Cross-Architecture Bug Search in Binary Executables}, year={2015}, volume={}, number={}, pages={709-724}, doi={10.1109/SP.2015.49}} @inproceedings{appspear15raid, address = {Cham}, author = {Yang, Wenbo and Zhang, Yuanyuan and Li, Juanru and Shu, Junliang and Li, Bodong and Hu, Wenjun and Gu, Dawu}, booktitle = {Proc. of the International Symposium on Recent Advances in Intrusion Detection (RAID)}, date-added = {2021-10-08 10:49:25 +0200}, date-modified = {2021-10-08 10:49:55 +0200}, editor = {Bos, Herbert and Monrose, Fabian and Blanc, Gregory}, isbn = {978-3-319-26362-5}, pages = {359--381}, publisher = {Springer International Publishing}, title = {AppSpear: Bytecode Decrypting and DEX Reassembling for Packed Android Malware}, year = {2015}} @inproceedings{adiff18ase, author = {Liu, Bingchang and Huo, Wei and Zhang, Chao and Li, Wenchao and Li, Feng and Piao, Aihua and Zou, Wei}, title = {$alpha$Diff: Cross-Version Binary Code Similarity Detection with DNN}, year = {2018}, isbn = {9781450359375}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3238147.3238199}, doi = {10.1145/3238147.3238199}, booktitle = {Proc. of the ACM/IEEE International Conference on Automated Software Engineering (ASE)}, pages = {667–678}, numpages = {12}, keywords = {Code Similarity Detection, DNN}, location = {Montpellier, France}, series = {ASE 2018} } @inproceedings{binxray20issta, author = {Xu, Yifei and Xu, Zhengzi and Chen, Bihuan and Song, Fu and Liu, Yang and Liu, Ting}, title = {Patch Based Vulnerability Matching for Binary Programs}, year = {2020}, isbn = {9781450380089}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3395363.3397361}, doi = {10.1145/3395363.3397361}, booktitle = {Proc. of the ACM SIGSOFT International Symposium on Software Testing and Analysis (ISSTA)}, pages = {376–387}, numpages = {12}, keywords = {Vulnerability Matching, Security, Binary Analysis, Patch Presence Identification}, location = {Virtual Event, USA}, series = {ISSTA 2020}, Note = {Source code:} } @inproceedings{dexhunter15esorics, address = {Cham}, author = {Zhang, Yueqian and Luo, Xiapu and Yin, Haoyang}, booktitle = {Proc. of the European Symposium on Research in Computer Security (ESORICS)}, date-added = {2021-10-08 10:48:21 +0200}, date-modified = {2021-10-08 10:48:49 +0200}, editor = {Pernul, G{\"u}nther and Y A Ryan, Peter and Weippl, Edgar}, isbn = {978-3-319-24177-7}, pages = {293--311}, publisher = {Springer International Publishing}, title = {DexHunter: Toward Extracting Hidden Code from Packed Android Applications}, year = {2015}} @inproceedings{firmup18asplos, author = {Yaniv David and Nimrod Partush and Eran Yahav}, editor = {Xipeng Shen and James Tuck and Ricardo Bianchini and Vivek Sarkar}, title = {FirmUp: Precise Static Detection of Common Vulnerabilities in Firmware}, booktitle = {Proc. of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)}, pages = {392--404}, publisher = {{ACM}}, year = {2018}, url = {https://doi.org/10.1145/3173162.3177157}, doi = {10.1145/3173162.3177157}, timestamp = {Wed, 23 Jun 2021 15:34:31 +0200}, biburl = {https://dblp.org/rec/conf/asplos/DavidPY18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{vulnseeker18ase, author = {Gao, Jian and Yang, Xin and Fu, Ying and Jiang, Yu and Sun, Jiaguang}, title = {VulSeeker: A Semantic Learning Based Vulnerability Seeker for Cross-Platform Binary}, year = {2018}, isbn = {9781450359375}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3238147.3240480}, doi = {10.1145/3238147.3240480}, booktitle = {Proc. of the ACM/IEEE International Conference on Automated Software Engineering (ASE)}, pages = {896–899}, numpages = {4}, keywords = {semantic learning, vulnerability search, cross-platform binary}, location = {Montpellier, France}, series = {ASE 2018}, Note = {Source code: \url{https://github.com/buptsseGJ/VulSeeker}}} @incproceedings{rlz19bar, author = {Kimberly Redmond and Lannan Luo and Qiang Zeng}, title = {A Cross-Architecture Instruction Embedding Model for Natural LanguageProcessing-Inspired Binary Code Analysis}, year = {2019}, booktitle = {Proc. of the Workshop on Binary Analysis Research (BAR)}, Note = {Source code: \url{https://github.com/nlp-code-analysis/cross-arch-instr-model}} } @inproceedings{safe2019massarelli, author = {Massarelli, Luca and Di Luna, Giuseppe Antonio and Petroni, Fabio and Querzoni, Leonardo and Baldoni, Roberto}, booktitle = {Proc. of the Conference on Detection of Intrusions and Malware \& Vulnerability Assessment (DIMVA)}, date-modified = {2021-10-08 13:06:14 +0200}, Note = {Source code: \url{https://github.com/gadiluna/SAFE}}, title = {SAFE: Self-Attentive Function Embeddings for Binary Similarity}, year = {2019}} @inproceedings{zuo2019neural, title={Neural Machine Translation Inspired Binary Code Similarity Comparison beyond Function Pairs}, author={Zuo, Fei and Li, Xiaopeng and Young, Patrick and Luo,Lannan and Zeng,Qiang and Zhang, Zhexin}, booktitle={Proc. of the Annual Network and Distributed Systems Security Symposium (NDSS)}, Note = {Artifacts: \url{https://nmt4binaries.github.io}}, year={2019} } @inproceedings{reoptimization2017david, address = {{Barcelona Spain}}, annotation = {ZSCC: 0000047}, author = {David, Yaniv and Partush, Nimrod and Yahav, Eran}, booktitle = {Proc. of the {{ACM SIGPLAN Conference}} on {{Programming Language Design}} and {{Implementation}} (PLDI)}, date-modified = {2021-10-11 11:13:44 +0200}, doi = {10.1145/3062341.3062387}, isbn = {978-1-4503-4988-8}, keywords = {status.citekeyOK}, month = jun, pages = {79--94}, publisher = {{ACM}}, title = {Similarity of Binaries through Re-Optimization}, year = {2017}, bdsk-url-1 = {https://doi.org/10.1145/3062341.3062387}} @article{elsim2012pouik, author = {Pouik and G0rfi3ld}, journal = {Phrack Magazine}, month = apr, number = {68}, title = {Similarities for {{Fun}} \& {{Profit}}}, year = {2012}} @misc{wharton2016diffuse, author = {Wharton, Jake}, howpublished = {\url{https://github.com/JakeWharton/diffuse}}, journal = {GitHub repository}, publisher = {GitHub}, title = {diffuse}, urldate = {2021-04-20}, year = {2016}} @misc{radarediffing, author = {Radare}, title = {Binary diffing}, howpublished = {\url{https://radareorg.github.io/blog/posts/binary-diffing/}}, year = {2014}, Note = {Source code: \url{https://github.com/radareorg/radare2/tree/master/binr/radiff2}}, month = aug } @misc{apkid, author = {{RedNaga Security}}, title = {Detecting Pirated and Malicious Android Apps with APKiD}, howpublished = {\url{https://rednaga.io/2016/07/31/detecting_pirated_and_malicious_android_apps_with_apkid/}}, year = {2016}, Note = {Source code: \url{https://github.com/rednaga/APKiD}}, month = jul } @misc{functionsimsearh, author = {Thomas Dullien}, title = {Searching statically-linked vulnerable library functions in executable code}, howpublished = {\url{https://googleprojectzero.blogspot.com/2018/12/searching-statically-linked-vulnerable.html}}, year = {2014}, Note = {Source code: \url{https://github.com/thomasdullien/functionsimsearch}} } @inproceedings{simidroid2017li, abstract = {App updates and repackaging are recurrent in the Android ecosystem, filling markets with similar apps that must be identified and analysed to accelerate user adoption, improve development efforts, and prevent malware spreading. Despite the existence of several approaches to improve the scalability of detecting repackaged/cloned apps, researchers and practitioners are eventually faced with the need for a comprehensive pairwise comparison to understand and validate the similarities among apps. This paper describes the design of SimiDroid, a framework for multi-level comparison of Android apps. SimiDroid is built with the aim to support the understanding of similarities/changes among app versions and among repackaged apps. In particular, we demonstrate the need and usefulness of such a framework based on different case studies implementing different analysing scenarios for revealing various insights on how repackaged apps are built. We further show that the similarity comparison plugins implemented in SimiDroid yield more accurate results than the state-of-the-art.}, annotation = {ZSCC: 0000028}, author = {Li, Li and Bissyand{\'e}, Tegawend{\'e} F. and Klein, Jacques}, booktitle = {Proc. of the IEEE International Conference On Trust, Security And Privacy In Computing And Communications (TrustCom)}, date-modified = {2021-10-08 13:08:27 +0200}, doi = {10.1109/Trustcom/BigDataSE/ICESS.2017.230}, file = {/home/jakob/.zotero/storage/storage/RWL4KNSN/Li et al_2017_SimiDroid.pdf;/home/jakob/.zotero/storage/storage/ZCSNP9YX/8029433.html}, issn = {2324-9013}, keywords = {Androids,Feature extraction,hasCode,Humanoid robots,Malware,Payloads,Receivers,Tools}, month = aug, pages = {136--143}, shorttitle = {{{SimiDroid}}}, title = {{{SimiDroid}}: {{Identifying}} and {{Explaining Similarities}} in {{Android Apps}}}, year = {2017}, Note = {Source code: \url{https://github.com/lilicoding/SimiDroid}}, bdsk-url-1 = {https://doi.org/10.1109/Trustcom/BigDataSE/ICESS.2017.230}} @inproceedings{astli2019feichtner, address = {{Canterbury, CA, United Kingdom}}, author = {Feichtner, Johannes and Rabensteiner, Christof}, booktitle = {Proc. of the {{International Conference}} on {{Availability}}, {{Reliability}} and {{Security}} (ARES)}, date-modified = {2021-10-10 11:48:34 +0200}, doi = {10.1145/3339252.3339260}, isbn = {978-1-4503-7164-3}, pages = {1--10}, publisher = {{ACM Press}}, title = {{{Obfuscation}}-{{Resilient Code Recognition}} in {{Android Apps}}}, year = {2019}, Note = {Source code: \url{https://github.com/kstudent/astli}}, bdsk-url-1 = {https://doi.org/10.1145/3339252.3339260}} @inproceedings{sootdiff2019dann, abstract = {Different Java compilers and compiler versions, e.g., javac or ecj, produce different bytecode from the same source code. This makes it hard to trace if the bytecode of an open-source library really matches the provided source code. Moreover, it prevents one from detecting which open-source libraries have been re-compiled and rebundled into a single jar, which is a common way to distribute an application. Such rebundling is problematic because it prevents one to check if the jar file contains open-source libraries with known vulnerabilities. To cope with these problems, we propose the tool SootDiff that uses Soot's intermediate representation Jimple, in combination with code clone detection techniques, to reduce dissimilarities introduced by different compilers, and to identify clones. Our results show that SootDiff successfully identifies clones in 102 of 144 cases, whereas bytecode comparison succeeds in 58 cases only.}, address = {New York, NY, USA}, author = {Dann, Andreas and Hermann, Ben and Bodden, Eric}, booktitle = {Proc. of the ACM SIGPLAN International Workshop on State Of the Art in Program Analysis (SOAP)}, date-modified = {2021-10-10 11:50:47 +0200}, doi = {10.1145/3315568.3329966}, isbn = {9781450367202}, keywords = {Code Clone Detection, Static Analysis, Intermediate Representation}, location = {Phoenix, AZ, USA}, numpages = {6}, pages = {14--19}, publisher = {Association for Computing Machinery}, series = {SOAP 2019}, title = {SootDiff: Bytecode Comparison across Different Java Compilers}, url = {https://doi.org/10.1145/3315568.3329966}, year = {2019}, bdsk-url-1 = {https://doi.org/10.1145/3315568.3329966}} @article{fsquadra2014zhauniarovich, abstract = {The ease of Android applications repackaging and proliferation of application clones in Google Play and other markets call for new effective techniques to detect repackaged code and combat distribution of cloned applications. Today all existing techniques for repackaging detection are based on code similarity or feature (e.g., permission set) similarity evaluation. We propose a new approach to detect repackaging based on the resource files available in application packages. Our tool called FSquaDRA performs a quick pairwise application comparison (full pairwise comparison for 55,000 applications in just 80 hours on a laptop), as it measures how many identical resources are present inside both packages under analysis. The intuition behind our approach is that malicious repackaged applications still need to maintain the ``look and feel'' of the originals by including the same images and other resource files, even though they might have additional code included or some of the original code removed.}, annotation = {ZSCC: 0000072}, author = {Zhauniarovich, Yury and Gadyatskaya, Olga and Crispo, Bruno}, date-modified = {2021-10-08 13:12:32 +0200}, file = {/home/jakob/.zotero/storage/storage/2KB82F7V/Zhauniarovich et al. - FSquaDRA Fast Detection of Repackaged Application.pdf}, journal = {Proc. of the IFIP Annual Conference on Data and Applications Security and Privacy (DBSec)}, keywords = {hasCode}, pages = {16}, title = {{{FSquaDRA}}: {{Fast Detection}} of {{Repackaged Applications}}}, year = {2014}} @article{androidhiv2020chen, abstract = {Machine learning-based solutions have been successfully employed for the automatic detection of malware on Android. However, machine learning models lack robustness to adversarial examples, which are crafted by adding carefully chosen perturbations to the normal inputs. So far, the adversarial examples can only deceive detectors that rely on syntactic features (e.g., requested permissions, API calls, etc.), and the perturbations can only be implemented by simply modifying application's manifest. While recent Android malware detectors rely more on semantic features from Dalvik bytecode rather than manifest, existing attacking/defending methods are no longer effective. In this paper, we introduce a new attacking method that generates adversarial examples of Android malware and evades being detected by the current models. To this end, we propose a method of applying optimal perturbations onto Android APK that can successfully deceive the machine learning detectors. We develop an automated tool to generate the adversarial examples without human intervention. In contrast to existing works, the adversarial examples crafted by our method can also deceive recent machine learning-based detectors that rely on semantic features such as control-flow-graph. The perturbations can also be implemented directly onto APK's Dalvik bytecode rather than Android manifest to evade from recent detectors. We demonstrate our attack on two state-of-the-art Android malware detection schemes, MaMaDroid and Drebin. Our results show that the malware detection rates decreased from 96\% to 0\% in MaMaDroid, and from 97\% to 0\% in Drebin, with just a small number of codes to be inserted into the APK.}, annotation = {ZSCC: NoCitationData[s0]}, author = {Chen, X. and Li, C. and Wang, D. and Wen, S. and Zhang, J. and Nepal, S. and Xiang, Y. and Ren, K.}, doi = {10.1109/TIFS.2019.2932228}, file = {/home/jakob/.zotero/storage/storage/B237WKGL/Chen et al_2020_Android HIV.pdf;/home/jakob/.zotero/storage/storage/QUNAJBVE/8782574.html}, issn = {1556-6021}, journal = {IEEE Transactions on Information Forensics and Security}, keywords = {adversarial machine learning,Android malware detection,Detectors,Feature extraction,Machine learning,Malware,Perturbation methods,Semantics,status.citekeyOK,Tools}, pages = {987--1001}, shorttitle = {Android {{HIV}}}, title = {Android {{HIV}}: {{A Study}} of {{Repackaging Malware}} for {{Evading Machine}}-{{Learning Detection}}}, volume = {15}, year = {2020}, bdsk-url-1 = {https://doi.org/10.1109/TIFS.2019.2932228}} @article{rebooting2018li, abstract = {Repackaging is a serious threat to the Android ecosystem as it deprives app developers of their benefits, contributes to spreading malware on users' devices, and increases the workload of market maintainers. In the space of six years, the research around this specific issue has produced 57 approaches which do not readily scale to millions of apps or are only evaluated on private datasets without, in general, tool support available to the community. Through a systematic literature review of the subject, we argue that the research is slowing down, where many state-of-the-art approaches have reported high-performance rates on closed datasets, which are unfortunately difficult to replicate and to compare against. In this work, we propose to reboot the research in repackaged app detection by providing a literature review that summarises the challenges and current solutions for detecting repackaged apps and by providing a large dataset that supports replications of existing solutions and implications of new research directions. We hope that these contributions will re-activate the direction of detecting repackaged apps and spark innovative approaches going beyond the current state-of-the-art.}, author = {Li, Li and Bissyand{\'e}, Tegawend{\'e} and Klein, Jacques}, date-modified = {2021-10-08 13:07:25 +0200}, journal = {IEEE Transactions on Software Engineering}, number = {4}, shorttitle = {Rebooting {{Research}} on {{Detecting Repackaged Android Apps}}}, title = {Rebooting {{Research}} on {{Detecting Repackaged Android Apps}}: {{Literature Review}} and {{Benchmark}}}, volume = {47}, year = {2021}} @article{survey2019haq, annotation = {haq2019survey\_asobca}, author = {Haq, Irfan Ul and Caballero, Juan}, date-modified = {2021-10-08 12:46:09 +0200}, journal = {ACM Computing Surveys}, keywords = {citeOK,Computer Science - Cryptography and Security}, number = {3}, title = {A {{Survey}} of {{Binary Code Similarity}}}, volume = {54}, year = {2021}} @misc{elsim2019, abstract = {Python3 version of the ELSIM Codebase, ported to support Androguard 3.4}, author = {{IKARUS Security Software GmbH}}, date-modified = {2021-10-11 20:26:25 +0200}, howpublished = {\url{https://github.com/IKARUSSoftwareSecurity/elsim}}, title = {Elsim}, urldate = {2021-07-30}, year = {2019}, bdsk-url-1 = {https://github.com/IKARUSSoftwareSecurity/elsim}} @inproceedings{optimal2021pang, author = {Pang, Chengbin and Yu, Ruotong and Xu, Dongpeng and Koskinen, Eric and Portokalidis, Georgios and Xu, Jun}, booktitle = {Proc. of the IEEE/IFIP International Conference on Dependable Systems and Networks (DSN)}, date-modified = {2021-10-08 13:10:03 +0200}, title = {Towards {{Optimal Use}} of {{Exception Handling Information}} for {{Function Detection}}}, year = {2021}, bdsk-url-1 = {http://arxiv.org/abs/2104.03168}} @conference{androguard_bh, author = {Anthony Desnos and Geoffroy Gueguen}, booktitle = {Black Hat Abu Dhabi}, date-added = {2015-07-22 15:37:08 +0000}, date-modified = {2015-07-22 15:37:08 +0000}, title = {{Android: From Reversing To Decompilation}}, note = {Source code: \url{https://github.com/androguard/androguard}}, year = {2011}} @misc{site:androguard, date-added = {2015-07-22 16:42:05 +0000}, date-modified = {2015-10-22 22:56:53 +0000}, howpublished = {\url{https://github.com/androguard/androguard}}, title = {Androguard}} @inproceedings{Crussell:2012:DNADroid, author = {Jonathan Crussell and Clint Gibler and Hao Chen}, booktitle = {Proc. of the European Symposium on Research in Computer Security (ESORICS)}, date-added = {2015-07-22 16:42:48 +0000}, date-modified = {2021-10-08 13:05:34 +0200}, title = {{Attack of the Clones: Detecting Cloned Applications on Android Markets}}, year = {2012}} @inproceedings{Crussell:2013:AnDarwin, author = {Jonathan Crussell and Clint Gibler and Hao Chen}, booktitle = {Proc. of the European Symposium on Research in Computer Security (ESORICS)}, date-added = {2015-07-22 16:42:48 +0000}, date-modified = {2021-10-08 13:05:26 +0200}, title = {{AnDarwin: Scalable Semantics-Based Detection of Similar Android Applications}}, year = {2013}} @inproceedings{Wang:2015:WuKong, author = {Wang, Haoyu and Guo, Yao and Ma, Ziang and Chen, Xiangqun}, booktitle = {Proc. of the International Symposium on Software Testing and Analysis (ISSTA)}, date-added = {2015-10-25 14:33:09 +0000}, date-modified = {2021-10-08 13:10:28 +0200}, title = {{WuKong: A Scalable and Accurate Two-phase Approach to Android App Clone Detection}}, year = {2015}, bdsk-url-1 = {http://doi.acm.org/10.1145/2771783.2771795}, bdsk-url-2 = {http://dx.doi.org/10.1145/2771783.2771795}} @inproceedings{Zhang:2014:ViewDroid, author = {Zhang, Fangfang and Huang, Heqing and Zhu, Sencun and Wu, Dinghao and Liu, Peng}, booktitle = {Proc. of the ACM Conference on Security and Privacy in Wireless \& Mobile Networks (WiSec)}, date-added = {2015-10-22 21:44:04 +0000}, date-modified = {2021-10-08 14:01:34 +0200}, title = {{ViewDroid: Towards Obfuscation-Resilient Mobile Application Repackaging Detection}}, year = {2014}, bdsk-url-1 = {http://doi.acm.org/10.1145/2627393.2627395}, bdsk-url-2 = {http://dx.doi.org/10.1145/2627393.2627395}} @inproceedings{piggyapp, author = {Zhou, Wu and Zhou, Yajin and Grace, Michael and Jiang, Xuxian and Zou, Shihong}, booktitle = {Proc. of the ACM Conference on Data and Application Security and Privacy (CODASPY)}, date-added = {2015-07-22 15:37:08 +0000}, date-modified = {2021-10-08 13:11:22 +0200}, title = {{Fast, Scalable Detection of "Piggybacked" Mobile Applications}}, year = 2013, bdsk-url-1 = {http://doi.acm.org/10.1145/2435349.2435377}, bdsk-url-2 = {http://dx.doi.org/10.1145/2435349.2435377}} @inproceedings{juxtapp, author = {Steve Hanna and Ling Huang and Edward Wu and Saung Li and Charles Chen and Dawn Song}, booktitle = {Proc. of the Conference on Detection of Intrusions and Malware \& Vulnerability Assessment (DIMVA)}, date-added = {2015-07-22 15:37:08 +0000}, date-modified = {2021-10-08 13:06:06 +0200}, title = {{Juxtapp: A Scalable System for Detecting Code Reuse Among Android Applications}}, year = {2012}} @inproceedings{droidmoss, acmid = {2133640}, author = {Zhou, Wu and Zhou, Yajin and Jiang, Xuxian and Ning, Peng}, booktitle = {Proc. of the ACM Conference on Data and Application Security and Privacy (CODASPY)}, date-added = {2015-07-22 16:42:48 +0000}, date-modified = {2021-10-08 13:11:16 +0200}, doi = {10.1145/2133601.2133640}, isbn = {978-1-4503-1091-8}, keywords = {privacy and security, repackaging, smartphones}, location = {San Antonio, Texas, USA}, numpages = {10}, title = {{Detecting Repackaged Smartphone Applications in Third-Party Android Marketplaces}}, year = {2012}, bdsk-url-1 = {http://dx.doi.org/10.1145/2133601.2133640}} @inproceedings{taintart:ccs16, author = {Sun, Mingshen and Wei, Tao and Lui, John C.S.}, booktitle = {Proc. of the ACM SIGSAC Conference on Computer and Communications Security (CCS)}, title = {TaintART: A Practical Multi-Level Information-Flow Tracking System for Android RunTime}, year = {2016}} @article{taintman:2020tdsc, author = {You, Wei and Liang, Bin and Shi, Wenchang and Wang, Peng and Zhang, Xiangyu}, doi = {10.1109/TDSC.2017.2740169}, journal = {IEEE Transactions on Dependable and Secure Computing}, number = {1}, pages = {209-222}, title = {TaintMan: An ART-Compatible Dynamic Taint Analysis Framework on Unmodified and Non-Rooted Android Devices}, volume = {17}, year = {2020}, bdsk-url-1 = {https://doi.org/10.1109/TDSC.2017.2740169}} @article{ndroid:2019tifs, author = {Xue, Lei and Qian, Chenxiong and Zhou, Hao and Luo, Xiapu and Zhou, Yajin and Shao, Yuru and Chan, Alvin T.S.}, doi = {10.1109/TIFS.2018.2866347}, journal = {IEEE Transactions on Information Forensics and Security}, number = {3}, pages = {814-828}, title = {NDroid: Toward Tracking Information Flows Across Multiple Android Contexts}, volume = {14}, year = {2019}, bdsk-url-1 = {https://doi.org/10.1109/TIFS.2018.2866347}} @inproceedings{tiro:18usenix, author = {Michelle Y. Wong and David Lie}, booktitle = {Proc. of the USENIX Security Symposium}, title = {Tackling Runtime-based obfuscation in Android with {TIRO}}, year = {2018}} @inproceedings{graux:hal-02877815, author = {Graux, Pierre and Lalande, Jean-Fran{\c c}ois and Wilke, Pierre and Viet Triem Tong, Val{\'e}rie}, booktitle = {Proc. of the Workshop on Software Attacks and Defenses (SAD)}, title = {{Abusing Android Runtime for Application Obfuscation}}, year = {2020}} @inproceedings{happer:21sp, author = {Xue, Lei and Zhou, Hao and Luo, Xiapu and Zhou, Yajin and Shi, Yang and Gu, Guofei and Zhang, Fengwei and Au, Man Ho}, booktitle = {Proc. of the IEEE Symposium on Security and Privacy (S\&P)}, title = {Happer: Unpacking Android Apps via a Hardware-Assisted Approach}, year = {2021}} @inproceedings{parema:21issta, address = {New York, NY, USA}, author = {Xue, Lei and Yan, Yuxiao and Yan, Luyi and Jiang, Muhui and Luo, Xiapu and Wu, Dinghao and Zhou, Yajin}, booktitle = {Proc. of the ACM SIGSOFT International Symposium on Software Testing and Analysis (ISSTA)}, doi = {10.1145/3460319.3464839}, isbn = {9781450384599}, pages = {152--164}, publisher = {Association for Computing Machinery}, title = {Parema: An Unpacking Framework for Demystifying VM-Based Android Packers}, year = {2021}, bdsk-url-1 = {https://doi.org/10.1145/3460319.3464839}} @inproceedings{malton:21sec, address = {Vancouver, BC}, author = {Lei Xue and Yajin Zhou and Ting Chen and Xiapu Luo and Guofei Gu}, booktitle = {Proc. of the USENIX Security Symposium}, isbn = {978-1-931971-40-9}, pages = {289--306}, title = {Malton: Towards On-Device Non-Invasive Mobile Malware Analysis for {ART}}, year = {2017}} @inproceedings{dexlego18dsn, author = {Ning, Zhenyu and Zhang, Fengwei}, booktitle = {Proc. of the Annual IEEE/IFIP International Conference on Dependable Systems and Networks (DSN)}, doi = {10.1109/DSN.2018.00075}, pages = {690-701}, title = {DexLego: Reassembleable Bytecode Extraction for Aiding Static Analysis}, year = {2018}, bdsk-url-1 = {https://doi.org/10.1109/DSN.2018.00075}} @inproceedings{CompARTist17ccs, address = {New York, NY, USA}, author = {Huang, Jie and Schranz, Oliver and Bugiel, Sven and Backes, Michael}, booktitle = {Proc. of the ACM SIGSAC Conference on Computer and Communications Security (CCS)}, isbn = {9781450349468}, pages = {1037--1049}, publisher = {Association for Computing Machinery}, title = {The ART of App Compartmentalization: Compiler-Based Library Privilege Separation on Stock Android}, year = {2017}} @inproceedings{artist16ares, author = {Dresel, Lukas and Protsenko, Mykolai and M{\"u}ller, Tilo}, booktitle = {Proc. of the International Conference on Availability, Reliability and Security (ARES)}, doi = {10.1109/ARES.2016.80}, pages = {107-116}, title = {ARTIST: The Android Runtime Instrumentation Toolkit}, year = {2016}, bdsk-url-1 = {https://doi.org/10.1109/ARES.2016.80}} @inproceedings{artist17eurosp, author = {Backes, Michael and Bugiel, Sven and Schranz, Oliver and Von Styp-Rekowsky, Philipp and Weisgerber, Sebastian}, booktitle = {Proc. of the IEEE European Symposium on Security and Privacy (EuroS\&P)}, doi = {10.1109/EuroSP.2017.43}, pages = {481-495}, title = {ARTist: The Android Runtime Instrumentation and Security Toolkit}, year = {2017}, bdsk-url-1 = {https://doi.org/10.1109/EuroSP.2017.43}} @inproceedings{libupdates19eurosp, author = {Huang, Jie and Borges, Nataniel and Bugiel, Sven and Backes, Michael}, booktitle = {Proc. of the IEEE European Symposium on Security and Privacy (EuroS\&P)}, doi = {10.1109/EuroSP.2019.00012}, pages = {15-30}, title = {Up-To-Crash: Evaluating Third-Party Library Updatability on Android}, year = {2019}, bdsk-url-1 = {https://doi.org/10.1109/EuroSP.2019.00012}} @inproceedings{preinstalled20sp, author = {Gamba, Julien and Rashed, Mohammed and Razaghpanah, Abbas and Tapiador, Juan and Vallina-Rodriguez, Narseo}, booktitle = {Proc. of the IEEE Symposium on Security and Privacy (S\&P)}, doi = {10.1109/SP40000.2020.00013}, pages = {1039-1055}, title = {An Analysis of Pre-installed Android Software}, year = {2020}, bdsk-url-1 = {https://doi.org/10.1109/SP40000.2020.00013}} @inproceedings{librarian21icse, author = {Almanee, Sumaya and {\"U}nal, Arda and Payer, Mathias and Garcia, Joshua}, booktitle = {Proc. of the IEEE/ACM International Conference on Software Engineering (ICSE)}, doi = {10.1109/ICSE43902.2021.00122}, pages = {1347-1359}, title = {Too Quiet in the Library: An Empirical Study of Security Updates in Android Apps' Native Code}, year = {2021}, bdsk-url-1 = {https://doi.org/10.1109/ICSE43902.2021.00122}} @inproceedings{beyondplay18imc, address = {New York, NY, USA}, author = {Wang, Haoyu and Liu, Zhe and Liang, Jingyue and Vallina-Rodriguez, Narseo and Guo, Yao and Li, Li and Tapiador, Juan and Cao, Jingcun and Xu, Guoai}, booktitle = {Proc. of the Internet Measurement Conference (IMC)}, doi = {10.1145/3278532.3278558}, isbn = {9781450356190}, numpages = {15}, pages = {293--307}, publisher = {Association for Computing Machinery}, title = {Beyond Google Play: A Large-Scale Comparative Study of Chinese Android App Markets}, url = {https://doi.org/10.1145/3278532.3278558}, year = {2018}, bdsk-url-1 = {https://doi.org/10.1145/3278532.3278558}} @inproceedings{ishi17markets, author = {Yuta Ishii and Takuya Watanabe and Fumihiro Kanei and Yuta Takata and Eitaro Shioji and Mitsuaki Akiyama and Takeshi Yagi and Bo Sun and Tatsuya Mori}, bibsource = {dblp computer science bibliography, https://dblp.org}, biburl = {https://dblp.org/rec/conf/sigsoft/IshiiWKTSAYSM17.bib}, booktitle = {Proc. of the {ACM} {SIGSOFT} International Workshop on App Market Analytics (WAMA@ESEC/SIGSOFT FSE)}, doi = {10.1145/3121264.3121267}, pages = {12--18}, publisher = {{ACM}}, timestamp = {Sun, 25 Jul 2021 11:53:43 +0200}, title = {Understanding the Security Management of Global Third-Party Android Marketplaces}, url = {https://doi.org/10.1145/3121264.3121267}, year = {2017}, bdsk-url-1 = {https://doi.org/10.1145/3121264.3121267}} @misc{bindiff, author = {{Zynamics}}, date = {2021}, date-modified = {2021-10-11 19:55:34 +0200}, howpublished = {\url{https://www.zynamics.com/bindiff.html}}, title = {{BinDiff}}, urldate = {2021-06-25}, bdsk-url-1 = {https://www.zynamics.com/bindiff.html}} @misc{diaphora, author = {Joxean Koret}, date = {2019}, date-modified = {2021-10-11 19:55:34 +0200}, howpublished = {\url{http://diaphora.re}}, note = {Source code: \url{https://github.com/joxeankoret/diaphora}}, title = {{Diaphora}}, urldate = {2021-11-30} } @article{dullien2005graph, author = {Dullien, Thomas and Rolles, Rolf}, date-modified = {2021-10-11 22:33:15 +0200}, journal = {Symposium sur la s{\'e}curit{\'e} des technologies de l'information et des communications}, number = {1}, pages = {3}, title = {Graph-based Comparison of Executable Objects}, volume = {5}, year = {2005}} @INPROCEEDINGS{fotaapps2021, author={Blázquez, Eduardo and Pastrana, Sergio and Feal, Álvaro and Gamba, Julien and Kotzias, Platon and Vallina-Rodriguez, Narseo and Tapiador, Juan}, booktitle={Proc. of the IEEE Symposium on Security and Privacy (S\&P)}, title={Trouble Over-The-Air: An Analysis of FOTA Apps in the Android Ecosystem}, year={2021}, volume={}, number={}, pages={1606-1622}, doi={10.1109/SP40001.2021.00095}} @inproceedings{firmscope2021, author = {Mohamed Elsabagh and Ryan Johnson and Angelos Stavrou and Chaoshun Zuo and Qingchuan Zhao and Zhiqiang Lin}, title = {{FIRMSCOPE}: Automatic Uncovering of Privilege-Escalation Vulnerabilities in Pre-Installed Apps in Android Firmware}, booktitle = {Proc. of the {USENIX} Security Symposium}, year = {2020}, isbn = {978-1-939133-17-5}, pages = {2379--2396}, url = {https://www.usenix.org/conference/usenixsecurity20/presentation/elsabagh}, publisher = {{USENIX} Association}, month = aug, } @conference{stone:preinstalledbh, author = {Maddie Stone}, booktitle = {Black Hat USA}, title = {{Securing the System: A Deep Dive into Reversing Android Pre-Installed Apps}}, year = {2019}} @inproceedings{DBLP:conf/esorics/ZhangAYD16, author = {Xiao Zhang and Yousra Aafer and Kailiang Ying and Wenliang Du}, title = {Hey, You, Get Off of My Image: Detecting Data Residue in Android Images}, booktitle = {Proc. of the European Symposium on Research in Computer Security (ESORICS)}, series = {Lecture Notes in Computer Science}, volume = {9878}, pages = {401--421}, publisher = {Springer}, year = {2016}, url = {https://doi.org/10.1007/978-3-319-45744-4\_20}, doi = {10.1007/978-3-319-45744-4\_20}, timestamp = {Tue, 14 May 2019 10:00:53 +0200}, biburl = {https://dblp.org/rec/conf/esorics/ZhangAYD16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{xu:www19, author = {Xu, Mengwei and Liu, Jiawei and Liu, Yuanqiang and Lin, Felix Xiaozhu and Liu, Yunxin and Liu, Xuanzhe}, title = {A First Look at Deep Learning Apps on Smartphones}, year = {2019}, isbn = {9781450366748}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3308558.3313591}, doi = {10.1145/3308558.3313591}, booktitle = {Proc. of the World Wide Web Conference (WWW)} } @misc{fdroiddata, author = {{F-Droid}}, date = {2021}, howpublished = {\url{https://gitlab.com/fdroid/fdroiddata}}, title = {{F-Droid} Data Repository}, urldate = {2021-11-30}, bdsk-url-1 = {https://gitlab.com/fdroid/fdroiddata}} @misc{fdroidserver, author = {{F-Droid}}, date = {2021}, howpublished = {\url{https://gitlab.com/fdroid/fdroidserver/}}, title = {{F-Droid} Server Tools Repository}, urldate = {2021-11-30}, bdsk-url-1 = {https://gitlab.com/fdroid/fdroidserver/}}