ValueMonitor – Use an existing topic model¶
This page is a visualisation of the ValueMonitor prototype. In case you would like to use the notebook, click on the icon ‘Run in Google Colab’ hereunder:
1. Import dataset and packages ¶
In this step, the dataset and relavant python packages are imported
In [4]:
''' Packages'''
!pip install corextopic
!pip install joblib
!pip install tabulate
!pip install simple_colors
import os, sys, importlib
import pandas as pd
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
import pickle
''' Source code'''
user = "tristandewildt"
repo = "ValueMonitor_Prototype"
src_dir = "code"
pyfile_1 = "make_topic_model.py"
pyfile_2 = "create_visualisation.py"
if os.path.isdir(repo):
!rm -rf {repo}
!git clone https://github.com/{user}/{repo}.git
path = f"{repo}/{src_dir}"
if not path in sys.path:
sys.path.insert(1, path)
make_topic_model = importlib.import_module(pyfile_1.rstrip(".py"))
create_visualisation = importlib.import_module(pyfile_2.rstrip(".py"))
from make_topic_model import *
from create_visualisation import *
''' Datasets'''
!wget -q --show-progress --no-check-certificate 'https://docs.google.com/uc?export=download&id=12ZyryF8MbMYKuhIBEhUUvnvx43_cna56' -O dataset_ValueMonitor_prototype
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=12_EoLJLL_wjc8n1Az3wudsvaTgA605aK' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=12_EoLJLL_wjc8n1Az3wudsvaTgA605aK" -O combined_STOA_technologies_saved_topic_model && rm -rf /tmp/cookies.txt
with open('dataset_ValueMonitor_prototype', "rb") as fh:
df = pickle.load(fh)
with open('combined_STOA_technologies_saved_topic_model', "rb") as fh:
combined_STOA_technologies_saved_topic_model = pickle.load(fh)
results_import = import_topic_model(combined_STOA_technologies_saved_topic_model, df)
if len(results_import):
df_with_topics = results_import[0]
topics = results_import[1]
dict_anchor_words = results_import[2]
Requirement already satisfied: corextopic in c:\anaconda3\lib\site-packages (1.1) Requirement already satisfied: joblib in c:\anaconda3\lib\site-packages (1.0.1) Requirement already satisfied: tabulate in c:\anaconda3\lib\site-packages (0.8.9) Requirement already satisfied: simple_colors in c:\anaconda3\lib\site-packages (0.1.5)
2. Gap assessment ¶
It takes time before a good topic model is build in which topics adequately represent values. The code in the next cell can be used to import an existing topic model.
In [5]:
def plot_values_in_different_datasets(Selected_technology):
values_in_different_datasets(df_with_topics, Selected_technology, dict_anchor_words)
interact(plot_values_in_different_datasets, Selected_technology=["AI", "IoT"])
Out[5]:
<function __main__.plot_values_in_different_datasets(Selected_technology)>
In [9]:
def plot_print_sample_articles_topic(selected_technology, selected_value, selected_dataset, size_sample):
show_extracts = True # True, False
show_full_text = False # True, False
df_with_topics_selected_technology = df_with_topics[df_with_topics[selected_technology] == True]
df_with_topics_selected_technology_dataset = df_with_topics_selected_technology[df_with_topics_selected_technology['dataset'] == selected_dataset]
print_sample_articles_topic(df_with_topics_selected_technology_dataset, dict_anchor_words, topics, selected_value, size_sample, show_extracts, show_full_text)
interact(plot_print_sample_articles_topic, selected_value=[*dict_anchor_words], selected_dataset = ["TECH", "NEWS", "ETHICS", ], selected_technology=["AI", "IoT"], size_sample =(5,50, 5))
Out[9]:
<function __main__.plot_print_sample_articles_topic(selected_technology, selected_value, selected_dataset, size_sample)>
3. Impact assessment ¶
The occurence of values can be traced over time.
In [11]:
def plot_create_vis_values_over_time (selected_technology, selected_dataset, resampling, smoothing, max_value_y):
T0 = "1980-01-01" #YYYY-MM-DD
T1 = "2023-01-01" #YYYY-MM-DD
values_to_include_in_visualisation = []
resampling_dict = {"Year": "Y", "Month": "M", "Day": "D"}
resampling = resampling_dict[resampling]
df_with_topics_short = df_with_topics.loc[(df_with_topics['date'] >= dateutil.parser.parse(T0)) & (df_with_topics['date'] <= dateutil.parser.parse(T1))]
df_with_topics_selected_technology = df_with_topics_short[df_with_topics_short[selected_technology] == True]
df_with_topics_selected_technology_dataset = df_with_topics_selected_technology[df_with_topics_selected_technology['dataset'] == selected_dataset]
create_vis_values_over_time(df_with_topics_selected_technology_dataset, dict_anchor_words, resampling, values_to_include_in_visualisation, smoothing, max_value_y)
interact(plot_create_vis_values_over_time, selected_technology=["AI", "IoT"], selected_dataset = ["TECH", "NEWS", "ETHICS",], smoothing = (0.25,3, 0.25), max_value_y = (5,100, 5), resampling = ["Year", "Month", "Day"])
Out[11]:
<function __main__.plot_create_vis_values_over_time(selected_technology, selected_dataset, resampling, smoothing, max_value_y)>
In [12]:
def plot_print_sample_articles_topic(selected_value, size_sample):
T0 = "1960-01-01" #YYYY-MM-DD
T1 = "2023-01-01" #YYYY-MM-DD
show_extracts = True # True, False
show_full_text = False # True, False
df_with_topics_short = df_with_topics.loc[(df_with_topics['date'] >= dateutil.parser.parse(T0)) & (df_with_topics['date'] <= dateutil.parser.parse(T1))]
print_sample_articles_topic(df_with_topics_short, dict_anchor_words, topics, selected_value, size_sample, show_extracts, show_full_text)
interact(plot_print_sample_articles_topic, selected_value=[*dict_anchor_words], size_sample =(5,50, 5))
Out[12]:
<function __main__.plot_print_sample_articles_topic(selected_value, size_sample)>
4. Values in different realms ¶
ValueMonitor can be used to evaluate which values different societal groups tend to discuss.
In [13]:
def plot_values_in_different_groups(selected_dataset):
values_in_different_groups(df_with_topics, dict_anchor_words, selected_dataset)
interact(plot_values_in_different_groups, selected_dataset = ['NEWS', 'ETHICS', 'TECH'])
Out[13]:
<function __main__.plot_values_in_different_groups(selected_dataset)>
In [14]:
def plot_print_sample_articles_topic(selected_value, selected_dataset, size_sample):
show_extracts = True # True, False
show_full_text = False # True, False
'''--------------------------------------------------------------------------'''
df_with_topics_selected_technology_dataset = df_with_topics[df_with_topics['dataset'] == selected_dataset]
print_sample_articles_topic(df_with_topics_selected_technology_dataset, dict_anchor_words, topics, selected_value, size_sample, show_extracts, show_full_text)
interact(plot_print_sample_articles_topic, selected_value=[*dict_anchor_words], selected_dataset = ["TECH", "NEWS", "ETHICS", ], size_sample =(5,50, 5))
Out[14]:
<function __main__.plot_print_sample_articles_topic(selected_value, selected_dataset, size_sample)>
In [ ]: