2024
Kyriakou, Kyriakos; Otterbacher, Jahna
Modular Oversight Methodology: A framework to aid ethical alignment of algorithmic creations Journal Article Forthcoming
In: Design Science Journal, Forthcoming.
@article{nokey,
title = {Modular Oversight Methodology: A framework to aid ethical alignment of algorithmic creations},
author = {Kyriakos Kyriakou and Jahna Otterbacher },
year = {2024},
date = {2024-07-01},
journal = {Design Science Journal},
keywords = {},
pubstate = {forthcoming},
tppubtype = {article}
}
2023
Kyriakou, Kyriakos; Otterbacher, Jahna
In humans, we trust: Multidisciplinary perspectives on the requirements for human oversight in algorithmic processes Journal Article
In: Discover Artificial Intelligence, vol. 3, no. 1, 2023, ISSN: 2731-0809.
@article{Kyriakou2023,
title = {In humans, we trust: Multidisciplinary perspectives on the requirements for human oversight in algorithmic processes},
author = {Kyriakos Kyriakou and Jahna Otterbacher},
doi = {10.1007/s44163-023-00092-2},
issn = {2731-0809},
year = {2023},
date = {2023-12-00},
urldate = {2023-12-00},
journal = {Discover Artificial Intelligence},
volume = {3},
number = {1},
publisher = {Springer Science and Business Media LLC},
abstract = {Algorithms have greatly advanced and become integrated into our everyday lives. Although they support humans in daily functions, they often exhibit unwanted behaviors perpetuating social stereotypes, discrimination, and other forms of biases. Regardless of their accuracy on task, many algorithms do not get scrutinized for unintended behaviors in a systematic way. This phenomenon can propagate and amplify existing societal issues or even create new ones. Many have called for human supervision (human oversight) of algorithmic processes. Oversight is often presented as a way of monitoring algorithmic behavior, as to then address identified issues, by initiating a fix or even correcting the final decision. Unfortunately, a common consensus is missing in the scientific community as to what all human oversight entails. Most importantly, the requirements for a successful application of a human oversight process are only vaguely defined. To address this, we present a critical synthesis of five key articles from different domains, which discuss requirements for human oversight. We use the concept of the Society-in-the-Loop (SITL) [1] as the baseline for understanding and mapping these requirements. In addition, we comment on the requirements and the overall multidisciplinary trend around the topic. Then, we present the concept of a Modular Oversight Methodology (MOM) following the SITL viewpoint, by also considering the requirements identified from the selected literature. Finally, we present a set of suggestions and future work required for a successful application of a human oversight process in a SITL approach.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2022
Barlas, Pınar; Krahn, Maximilian; Kleanthous, Styliani; Kyriakou, Kyriakos; Otterbacher, Jahna
Shifting Our Awareness, Taking Back Tags: Temporal Changes in Computer Vision Services’ Social Behaviors Proceedings Article
In: pp. 22-31, 2022.
@inproceedings{Barlas_Krahn_Kleanthous_Kyriakou_Otterbacher_2022,
title = {Shifting Our Awareness, Taking Back Tags: Temporal Changes in Computer Vision Services’ Social Behaviors},
author = {Pınar Barlas and Maximilian Krahn and Styliani Kleanthous and Kyriakos Kyriakou and Jahna Otterbacher},
url = {https://ojs.aaai.org/index.php/ICWSM/article/view/19269},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
journal = {Proceedings of the International AAAI Conference on Web and Social Media},
volume = {16},
number = {1},
pages = {22-31},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Kyriakou, Kyriakos; Barlas, Pınar; Kleanthous, Styliani; Christoforou, Evgenia; Otterbacher, Jahna
Crowdsourcing Human Oversight on Image Tagging Algorithms: An initial study of image diversity Conference
Work in Progress (WiP) Paper of the 2021 AAAI Conference on Human Computation and Crowdsourcing (HCOMP'21), November 14–18, 2021, Virtual, 2021.
@conference{nokey,
title = {Crowdsourcing Human Oversight on Image Tagging Algorithms: An initial study of image diversity},
author = {Kyriakos Kyriakou and Pınar Barlas and Styliani Kleanthous and Evgenia Christoforou and Jahna Otterbacher},
url = {https://www.humancomputation.com},
year = {2021},
date = {2021-11-14},
urldate = {2021-11-14},
booktitle = {Work in Progress (WiP) Paper of the 2021 AAAI Conference on Human Computation and Crowdsourcing (HCOMP'21), November 14–18, 2021, Virtual},
abstract = {Various stakeholders have called for human oversight of algorithmic processes, as a means to mitigate the possibility for automated discrimination and other social harms. This is even more crucial in light of the democratization of AI, where data and algorithms, such as Cognitive Services, are deployed into various applications and socio-cultural contexts. Inspired by previous work proposing human-in-the-loop governance mechanisms, we run a feasibility study involving image tagging services. Specifically, we ask whether micro-task crowd-sourcing can be an effective means for collecting a diverse pool of data for evaluating fairness in a hypothetical scenario of analyzing professional profile photos in a later phase. In this work-in-progress paper, we present our proposed oversight approach and framework for analyzing the diversity of the images provided. Given the subjectivity of fairness judgments, we first aimed to recruit a diverse crowd from three distinct regions. This study lays the groundwork for expanding the approach, to offer developers a means to evaluate Cognitive Services before and/or during deployment.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Barlas, Pınar; Kyriakou, Kyriakos; Kleanthous, Styliani; Otterbacher, Jahna
Person, Human, Neither: The Dehumanization Potential of Automated Image Tagging Proceedings Article
In: Proceedings of the 2021 AAAI/ACM Conference on AI, Ethics, and Society (AIES '21), May 19â•fi21, 2021, Virtual Event, USA, Association for Computing Machinery, 2021, ISBN: 9781450384735.
@inproceedings{Pinar2021dehumanization,
title = {Person, Human, Neither: The Dehumanization Potential of Automated Image Tagging},
author = {Pınar Barlas and Kyriakos Kyriakou and Styliani Kleanthous and Jahna Otterbacher},
doi = {10.1145/3461702.3462567},
isbn = {9781450384735},
year = {2021},
date = {2021-05-24},
urldate = {2021-05-24},
booktitle = {Proceedings of the 2021 AAAI/ACM Conference on AI, Ethics, and Society (AIES '21), May 19â•fi21, 2021, Virtual Event, USA},
volume = {1},
number = {1},
publisher = {Association for Computing Machinery},
abstract = {Following the literature on dehumanization via technology, we audit six proprietary image tagging algorithms (ITAs) for their potential to perpetuate dehumanization. We examine the ITAs’ outputs on a controlled dataset of images depicting a diverse group of people for tags that indicate the presence of a human in the image. Through an analysis of the (mis)use of these tags, we find that there are some individuals whose ‘humanness’ is not recognized by an ITA, and that these individuals are often from marginalized social groups. Finally, we compare these findings with the use of the ‘face’ tag, which can be used for surveillance, revealing that people’s faces are often recognized by an ITA even when their ‘humanness’ is not. Overall, we highlight the subtle ways in which ITAs may inflict widespread, disparate harm, and emphasize the importance of considering the social context of the resulting application.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Barlas, Pınar; Kyriakou, Kyriakos; Guest, Olivia; Kleanthous, Styliani; Otterbacher, Jahna
To "See" is to Stereotype Proceedings Article
In: pp. 1–31, 2021, ISSN: 2573-0142.
@inproceedings{Barlas2021,
title = {To "See" is to Stereotype},
author = {Pınar Barlas and Kyriakos Kyriakou and Olivia Guest and Styliani Kleanthous and Jahna Otterbacher},
url = {https://dl.acm.org/doi/10.1145/3432931},
doi = {10.1145/3432931},
issn = {2573-0142},
year = {2021},
date = {2021-01-01},
journal = {Proceedings of the ACM on Human-Computer Interaction},
volume = {4},
number = {CSCW3},
pages = {1--31},
abstract = {Machine-learned computer vision algorithms for tagging images are increasingly used by developers and researchers, having become popularized as easy-to-use "cognitive services." Yet these tools struggle with gender recognition, particularly when processing images of women, people of color and non-binary individuals. Socio-technical researchers have cited data bias as a key problem; training datasets often over-represent images of people and contexts that convey social stereotypes. The social psychology literature explains that people learn social stereotypes, in part, by observing others in particular roles and contexts, and can inadvertently learn to associate gender with scenes, occupations and activities. Thus, we study the extent to which image tagging algorithms mimic this phenomenon. We design a controlled experiment, to examine the interdependence between algorithmic recognition of context and the depicted person's gender. In the spirit of auditing to understand machine behaviors, we create a highly controlled dataset of people images, imposed on gender-stereotyped backgrounds. Our methodology is reproducible and our code publicly available. Evaluating five proprietary algorithms, we find that in three, gender inference is hindered when a background is introduced. Of the two that "see" both backgrounds and gender, it is the one whose output is most consistent with human stereotyping processes that is superior in recognizing gender. We discuss the accuracy--fairness trade-off, as well as the importance of auditing black boxes in better understanding this double-edged sword.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2020
Barlas, Pınar; Kyriakou, Kyriakos; Chrysanthou, Antrea; Kleanthous, Styliani; Otterbacher, Jahna
OPIAS: Over-Personalization in Information Access Systems Proceedings Article
In: Adjunct Publication of the 28th ACM Conference on User Modeling, Adaptation and Personalization, pp. 103–104, ACM, New York, NY, USA, 2020, ISBN: 9781450379502.
@inproceedings{Barlas2020,
title = {OPIAS: Over-Personalization in Information Access Systems},
author = {Pınar Barlas and Kyriakos Kyriakou and Antrea Chrysanthou and Styliani Kleanthous and Jahna Otterbacher},
url = {https://dl.acm.org/doi/10.1145/3386392.3397607},
doi = {10.1145/3386392.3397607},
isbn = {9781450379502},
year = {2020},
date = {2020-07-01},
booktitle = {Adjunct Publication of the 28th ACM Conference on User Modeling, Adaptation and Personalization},
pages = {103--104},
publisher = {ACM},
address = {New York, NY, USA},
abstract = {"Filter bubbles," a phenomenon in which users become caught in an information space with low diversity, can have various negative effects. Several tools have been created to monitor the users' actions to make them aware of their own filter bubbles, but these tools have disadvantages (e.g., infringement on privacy). We propose a standalone demo that does not require any personal data. It emulates Facebook, a well-known and popular social network. We demonstrate how each user interaction may affect the selection of subsequent posts, sometimes resulting in the creation of a 'filter bubble.' The administrator (researcher) can tailor the demo for any context, changing the topics and points of view used in the demo. Data collection via surveys before and after the demo is facilitated so that the demo can be used for research, in addition to education.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Kyriakou, Kyriakos; Kleanthous, Styliani; Otterbacher, Jahna; Papadopoulos, George A
Emotion-based Stereotypes in Image Analysis Services Proceedings Article
In: Adjunct Publication of the 28th ACM Conference on User Modeling, Adaptation and Personalization, pp. 252–259, ACM, New York, NY, USA, 2020, ISBN: 9781450379502.
@inproceedings{Kyriakou2020a,
title = {Emotion-based Stereotypes in Image Analysis Services},
author = {Kyriakos Kyriakou and Styliani Kleanthous and Jahna Otterbacher and George A Papadopoulos},
url = {https://dl.acm.org/doi/10.1145/3386392.3399567},
doi = {10.1145/3386392.3399567},
isbn = {9781450379502},
year = {2020},
date = {2020-07-01},
booktitle = {Adjunct Publication of the 28th ACM Conference on User Modeling, Adaptation and Personalization},
pages = {252--259},
publisher = {ACM},
address = {New York, NY, USA},
abstract = {Vision-based cognitive services (CogS) have become crucial in a wide range of applications, from real-time security and social networks to smartphone applications. Many services focus on analyzing people images. When it comes to facial analysis, these services can be misleading or even inaccurate, raising ethical concerns such as the amplification of social stereotypes. We analyzed popular Image Tagging CogS that infer emotion from a person's face, considering whether they perpetuate racial and gender stereotypes concerning emotion. By comparing both CogS and Human-generated descriptions on a set of controlled images, we highlight the need for transparency and fairness in CogS. In particular, we document evidence that CogS may actually be more likely than crowdworkers to perpetuate the stereotype of the "angry black man" and often attribute black race individuals with "emotions of hostility".},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Chrysanthou, Antrea; Barlas, Pınar; Kyriakou, Kyriakos; Kleanthous, Styliani; Otterbacher, Jahna
Bursting the Bubble: Tool for Awareness and Research about Overpersonalization in Information Access Systems Proceedings Article
In: Proceedings of the 25th International Conference on Intelligent User Interfaces Companion, pp. 112–113, ACM, New York, NY, USA, 2020, ISBN: 9781450375139.
@inproceedings{Chrysanthou2020,
title = {Bursting the Bubble: Tool for Awareness and Research about Overpersonalization in Information Access Systems},
author = {Antrea Chrysanthou and Pınar Barlas and Kyriakos Kyriakou and Styliani Kleanthous and Jahna Otterbacher},
url = {https://dl.acm.org/doi/10.1145/3379336.3381863},
doi = {10.1145/3379336.3381863},
isbn = {9781450375139},
year = {2020},
date = {2020-03-01},
booktitle = {Proceedings of the 25th International Conference on Intelligent User Interfaces Companion},
pages = {112--113},
publisher = {ACM},
address = {New York, NY, USA},
abstract = {Modern information access systems extensively use personalization, automatically filtering and/or ranking content based on the user profile, to guide users to the most relevant material. However, this can also lead to unwanted effects such as the "filter bubble." We present an interactive demonstration system, designed as an educational and research tool, which imitates a search engine, personalizing the search results returned for a query based on the user's characteristics. The system can be tailored to suit any type of audience and context, as well as enabling the collection of responses and interaction data.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Kyriakou, Kyriakos; Barlas, Pınar; Kleanthous, Styliani; Otterbacher, Jahna
OpenTag: Understanding Human Perceptions of Image Tagging Algorithms Proceedings Article
In: Proceedings of the 8th AAAI Conference on Human Computation and Crowdsourcing, Hilversum, The Netherlands, 2020.
@inproceedings{Kyriakou2020,
title = {OpenTag: Understanding Human Perceptions of Image Tagging Algorithms},
author = {Kyriakos Kyriakou and Pınar Barlas and Styliani Kleanthous and Jahna Otterbacher},
url = {www.aaai.org},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 8th AAAI Conference on Human Computation and Crowdsourcing},
address = {Hilversum, The Netherlands},
abstract = {Image Tagging Algorithms (ITAs) are extensively used in our information ecosystem, from facilitating the retrieval of images in social platforms to learning about users and their preferences. However, audits performed on ITAs have demonstrated that their behaviors often exhibit social biases, especially when analyzing images depicting people. We present OpenTag, a platform that fuses the auditing process with a crowdsourcing approach. Users can upload an image, which is then analyzed by various ITAs, resulting in multiple sets of descriptive tags. With OpenTag, the user can observe and compare the output of multiple ITAs simultaneously, while researchers can study the manner in which users perceive this output. Finally, using the collected data, further audits can be performed on ITAs.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2019
Barlas, Pınar; Kleanthous, Styliani; Kyriakou, Kyriakos; Otterbacher, Jahna
What Makes an Image Tagger Fair? Proceedings Article
In: Proceedings of the 27th ACM Conference on User Modeling, Adaptation and Personalization, pp. 95–103, ACM, New York, NY, USA, 2019, ISBN: 9781450360210.
@inproceedings{Barlas2019a,
title = {What Makes an Image Tagger Fair?},
author = {Pınar Barlas and Styliani Kleanthous and Kyriakos Kyriakou and Jahna Otterbacher},
url = {https://dl.acm.org/doi/10.1145/3320435.3320442},
doi = {10.1145/3320435.3320442},
isbn = {9781450360210},
year = {2019},
date = {2019-06-01},
booktitle = {Proceedings of the 27th ACM Conference on User Modeling, Adaptation and Personalization},
pages = {95--103},
publisher = {ACM},
address = {New York, NY, USA},
abstract = {Image analysis algorithms have been a boon to personalization in digital systems and are now widely available via easy-to-use APIs. However, it is important to ensure that they behave fairly in applications that involve processing images of people, such as dating apps. We conduct an experiment to shed light on the factors influencing the perception of "fairness." Participants are shown a photo along with two descriptions (human- and algorithm-generated). They are then asked to indicate which is "more fair" in the context of a dating site, and explain their reasoning. We vary a number of factors, including the gender, race and attractiveness of the person in the photo. While participants generally found human-generated tags to be more fair, API tags were judged as being more fair in one setting - where the image depicted an "attractive," white individual. In their explanations, participants often mention accuracy, as well as the objectivity/subjectivity of the tags in the description. We relate our work to the ongoing conversation about fairness in opaque tools like image tagging APIs, and their potential to result in harm.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Otterbacher, Jahna; Barlas, Pınar; Kleanthous, Styliani; Kyriakou, Kyriakos
How Do We Talk about Other People? Group (Un)Fairness in Natural Language Image Descriptions Technical Report
no. 1, 2019.
@techreport{Otterbacher2019,
title = {How Do We Talk about Other People? Group (Un)Fairness in Natural Language Image Descriptions},
author = {Jahna Otterbacher and Pınar Barlas and Styliani Kleanthous and Kyriakos Kyriakou},
url = {https://blog.clarifai.com/4-ways-ai-is-improving-dating},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the Seventh AAAI Conference on Human Computation and Crowdsourcing (HCOMP-19)},
volume = {7},
number = {1},
pages = {19},
publisher = {AAAI},
abstract = {Crowdsourcing plays a key role in developing algorithms for image recognition or captioning. Major datasets, such as MS COCO or Flickr30K, have been built by eliciting natural language descriptions of images from workers. Yet such elicita-tion tasks are susceptible to human biases, including stereotyping people depicted in images. Given the growing concerns surrounding discrimination in algorithms, as well as in the data used to train them, it is necessary to take a critical look at this practice. We conduct experiments at Figure Eight using a controlled set of people images. Men and women of various races are positioned in the same manner, wearing a grey t-shirt. We prompt workers for 10 descriptive labels, and consider them using the human-centric approach, which assumes reporting bias. We find that "what's worth saying" about these uniform images often differs as a function of the gender and race of the depicted person, violating the notion of group fairness. Although this diversity in natural language people descriptions is expected and often beneficial, it could result in automated disparate impact if not managed properly.},
keywords = {},
pubstate = {published},
tppubtype = {techreport}
}
Barlas, Pınar; Kyriakou, Kyriakos; Kleanthous, Styliani; Otterbacher, Jahna
Social B(eye)as: Human and machine descriptions of people images Proceedings Article
In: Proceedings of the 13th International Conference on Web and Social Media, ICWSM 2019, pp. 583–591, 2019.
@inproceedings{Barlas2019,
title = {Social B(eye)as: Human and machine descriptions of people images},
author = {Pınar Barlas and Kyriakos Kyriakou and Styliani Kleanthous and Jahna Otterbacher},
url = {https://doi.org/10.7910/DVN/APZKSS},
doi = {10.7910/DVN/APZKSS},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the 13th International Conference on Web and Social Media, ICWSM 2019},
pages = {583--591},
abstract = {Image analysis algorithms have become an indispensable tool in our information ecosystem, facilitating new forms of visual communication and information sharing. At the same time, they enable large-scale socio-technical research which would otherwise be difficult to carry out. However, their outputs may exhibit social bias, especially when analyzing people images. Since most algorithms are proprietary and opaque, we propose a method of auditing their outputs for social biases. To be able to compare how algorithms interpret a controlled set of people images, we collected descriptions across six image tagging algorithms. In order to compare these results to human behavior, we also collected descriptions on the same images from crowdworkers in two anglophone regions. The dataset we present consists of tags from these eight taggers, along with a typology of concepts, and a python script to calculate vector scores for each image and tagger. Using our methodology, researchers can see the behaviors of the image tagging algorithms and compare them to those of crowdworkers. Beyond computer vision auditing, the dataset of human- and machine-produced tags, the typology, and the vectorization method can be used to explore a range of research questions related to both algorithmic and human behaviors.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Kyriakou, Kyriakos; Barlas, Pınar; Kleanthous, Styliani; Otterbacher, Jahna
Fairness in proprietary image tagging algorithms: A cross-platform audit on people images Proceedings Article
In: Proceedings of the 13th International Conference on Web and Social Media, ICWSM 2019, pp. 313–322, 2019.
@inproceedings{Kyriakou2018,
title = {Fairness in proprietary image tagging algorithms: A cross-platform audit on people images},
author = {Kyriakos Kyriakou and Pınar Barlas and Styliani Kleanthous and Jahna Otterbacher},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the 13th International Conference on Web and Social Media, ICWSM 2019},
pages = {313--322},
abstract = {There are increasing expectations that algorithms should behave in a manner that is socially just. We consider the case of image tagging APIs and their interpretations of people images. Image taggers have become indispensable in our information ecosystem, facilitating new modes of visual communication and sharing. Recently, they have become widely available as Cognitive Services. But while tagging APIs offer developers an inexpensive and convenient means to add functionality to their creations, most are opaque and proprietary. Through a cross-platform comparison of six taggers, we show that behaviors differ significantly. While some offer more interpretation on images, they may exhibit less fairness toward the depicted persons, by misuse of gender-related tags and/or making judgments on a person's physical appearance. We also discuss the difficulties of studying fairness in situations where algorithmic systems cannot be benchmarked against a ground truth.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}