Source code for bluesearch.widgets.search_widget
"""The Search widget."""
# Blue Brain Search is a text mining toolbox focused on scientific use cases.
#
# Copyright (C) 2020 Blue Brain Project, EPFL.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import datetime
import enum
import functools
import json
import logging
import math
import pathlib
import sys
import textwrap
from urllib.parse import quote
import ipywidgets as widgets
import pandas as pd
import requests
from IPython.display import HTML, display
from .._css import style
from ..sql import (
get_titles,
retrieve_article_metadata_from_article_id,
retrieve_paragraph_from_sentence_id,
retrieve_sentences_from_sentence_ids,
)
from ..utils import Timer
logger = logging.getLogger(__name__)
class _Save(enum.Enum):
NOTHING = enum.auto()
PARAGRAPH = enum.auto()
ARTICLE = enum.auto()
[docs]class SearchWidget(widgets.VBox):
"""Widget for search engine.
Parameters
----------
bbs_search_url : str
The URL of the bbs_search server.
bbs_mysql_engine : sqlalchemy.engine.Engine
Engine for connections to the bbs_mysql server.
article_saver : bluesearch.widgets.ArticleSaver, optional
If specified, this article saver will keep all the article_id
of interest for the user during the different queries.
results_per_page : int, optional
The number of results to display per results page.
checkpoint_path : str or pathlib.Path, optional
Path where checkpoints are saved to and loaded from. If `None`, defaults
to `~/.cache/bluesearch/widgets_checkpoints`.
"""
def __init__(
self,
bbs_search_url,
bbs_mysql_engine,
article_saver=None,
results_per_page=10,
checkpoint_path=None,
):
super().__init__()
self.bbs_search_url = bbs_search_url
self.bbs_mysql_engine = bbs_mysql_engine
self.article_saver = article_saver
self.results_per_page = max(1, results_per_page)
self.n_pages = 1
self.current_page = -1
self.saving_labels = {
_Save.NOTHING: "Do not take this article",
_Save.PARAGRAPH: "Extract the paragraph",
_Save.ARTICLE: "Extract the entire article",
}
self.radio_buttons = []
self.current_sentence_ids = []
self.history = []
response = requests.post(
self.bbs_search_url + "/help",
)
if not response.ok:
raise Exception(
f"It seems there is an issue with the bbs search server. Response "
f"status is {response.status_code} : {response.text}"
)
response_json = response.json()
self.supported_models = response_json["supported_models"]
self.database_name = response_json["database"] # e.g "cord19_v47"
self.search_server_version = response_json["version"] # e.g. "0.0.9.dev2+g69"
self.widgets_style = {"description_width": "initial"}
self.widgets = {}
self._init_widgets()
self._init_ui()
if checkpoint_path is not None:
self.checkpoint_path = pathlib.Path(checkpoint_path)
else:
self.checkpoint_path = (
pathlib.Path.home() / ".cache" / "bluesearch" / "widgets_checkpoints"
)
self.checkpoint_path = self.checkpoint_path / "bbs_search.json"
self.checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
def _init_widgets(self):
"""Initialize widget dictionary."""
# Select model to compute Sentence Embeddings
self.widgets["sent_embedder"] = widgets.RadioButtons(
options=self.supported_models,
description="Model for Sentence Embedding",
style=self.widgets_style,
layout=widgets.Layout(width="450px", height="50px"),
)
# Select granularity of the search
self.widgets["granularity"] = widgets.ToggleButtons(
options=["sentences", "articles"],
value="articles",
disabled=False,
style={"description_width": "initial", "button_width": "80px"},
description="",
)
# Select n. of top results to return
self.widgets["top_results"] = widgets.widgets.IntText(
value=20, description="Show top ", style=self.widgets_style
)
# Choose whether to print whole paragraph containing sentence
# highlighted, or just the sentence
self.widgets["print_paragraph"] = widgets.Checkbox(
value=True, description="Show whole paragraph", style=self.widgets_style
)
# Enter Query
self.widgets["query_text"] = widgets.Textarea(
value="Glucose is a risk factor for COVID-19",
layout=widgets.Layout(width="90%", height="80px"),
description="Query",
style=self.widgets_style,
)
self.widgets["has_journal"] = widgets.Checkbox(
description="Only articles from journals",
value=True,
style=self.widgets_style,
)
self.widgets["is_english"] = widgets.Checkbox(
description="Only articles in English", value=True, style=self.widgets_style
)
self.widgets["discard_bad_sentences"] = widgets.Checkbox(
description="Discard sentences flagged as bad quality",
value=True,
style=self.widgets_style,
)
self.widgets["date_range"] = widgets.IntRangeSlider(
description="Date Range:",
continuous_update=False,
min=1850,
max=2020,
value=(2000, 2020),
layout=widgets.Layout(width="80ch"),
style=self.widgets_style,
)
# Enter Deprioritization Query
self.widgets["deprioritize_text"] = widgets.Textarea(
value="",
layout=widgets.Layout(width="90%", height="80px"),
description="Deprioritize",
style=self.widgets_style,
)
# Select Deprioritization Strength
self.widgets["deprioritize_strength"] = widgets.RadioButtons(
options=[
"None",
"Mild",
"Stronger",
], # ['None', 'Weak', 'Mild', 'Strong', 'Stronger']
disabled=False,
style={"description_width": "initial", "button_width": "80px"},
description="Deprioritization strength",
)
# Enter Substrings Exclusions
self.widgets["exclusion_text"] = widgets.Textarea(
layout=widgets.Layout(width="90%"),
value="",
style=self.widgets_style,
description="Substring Exclusion (newline separated): ",
rows=5,
)
self.widgets["exclusion_text"].layout.display = "none"
self.widgets["inclusion_text"] = widgets.Textarea(
layout=widgets.Layout(width="90%"),
value="",
style=self.widgets_style,
description="Exact phrase matching:",
rows=5,
placeholder=textwrap.dedent(
"""
Case insensitive, one phrase per line. Valid phrases are:
1. Single word : glucose
2. Multiple words : risk factor
3. Single word with variable suffix : molecul*
(matches "molecule", "molecules", "molecular")
"""
).strip(),
)
self.widgets["default_value_article_saver"] = widgets.RadioButtons(
options=[
(self.saving_labels[_Save.NOTHING], _Save.NOTHING),
(self.saving_labels[_Save.PARAGRAPH], _Save.PARAGRAPH),
(self.saving_labels[_Save.ARTICLE], _Save.ARTICLE),
],
value=_Save.ARTICLE,
disabled=False,
style={"description_width": "initial", "button_width": "200px"},
description="Default saving: ",
)
# Click to run Information Retrieval!
self.widgets["investigate_button"] = widgets.Button(
description="📚 Search Literature!",
layout=widgets.Layout(width="350px", height="50px"),
)
self.widgets["investigate_button"].add_class("bbs_button")
# Click to Save results
self.widgets["save_button"] = widgets.Button(
description="Save",
icon="download",
layout=widgets.Layout(width="172px", height="40px"),
)
self.widgets["save_button"].add_class("bbs_button")
# Click to Load results
self.widgets["load_button"] = widgets.Button(
description="Load",
icon="upload",
layout=widgets.Layout(width="172px", height="40px"),
)
self.widgets["load_button"].add_class("bbs_button")
# Click to run Generate Report!
self.widgets["report_button"] = widgets.Button(
description="Generate Report of Search Results",
layout=widgets.Layout(width="50%"),
)
self.widgets["articles_button"] = widgets.Button(
description="Generate Report of Selected Articles",
layout=widgets.Layout(width="50%"),
)
# Output Area
self.widgets["out"] = widgets.Output(layout={"border": "1px solid black"})
# Status Area
self.widgets["status"] = widgets.Output(
layout={"border": "1px solid black", "flex": "1"}
)
self.widgets["status_clear"] = widgets.Button(
description="Clear", layout={"max_width": "100px"}
)
self.widgets["status_clear"].on_click(
lambda b: self.widgets["status"].clear_output()
)
# Page buttons
self.widgets["page_back"] = widgets.Button(
description="←", layout={"width": "auto"}
)
self.widgets["page_label"] = widgets.Label(value="Page - of -")
self.widgets["page_forward"] = widgets.Button(
description="→", layout={"width": "auto"}
)
self.widgets["page_back"].on_click(
lambda b: self.set_page(self.current_page - 1)
)
self.widgets["page_forward"].on_click(
lambda b: self.set_page(self.current_page + 1)
)
# Put advanced settings to a tab
tabs = (
(
"Search / View",
[
self.widgets["sent_embedder"],
widgets.HBox(
children=[
self.widgets["top_results"],
self.widgets["granularity"],
]
),
self.widgets["print_paragraph"],
self.widgets["default_value_article_saver"],
],
),
(
"Filtering",
[
self.widgets["has_journal"],
self.widgets["is_english"],
self.widgets["discard_bad_sentences"],
self.widgets["date_range"],
self.widgets["deprioritize_text"],
self.widgets["deprioritize_strength"],
self.widgets["exclusion_text"],
self.widgets["inclusion_text"],
],
),
)
tab_widget = widgets.Tab(children=[])
tab_widget.layout.display = "none"
for i, (tab_name, tab_children) in enumerate(tabs):
tab_widget.children = tab_widget.children + (widgets.VBox(tab_children),)
tab_widget.set_title(i, tab_name)
self.widgets["advanced_settings"] = tab_widget
# Disable advanced settings checkbox
self.widgets["show_advanced_chb"] = widgets.Checkbox(
value=False,
description="Show advanced settings",
)
# Callbacks
self.widgets["investigate_button"].on_click(self._cb_bt_investigate)
self.widgets["save_button"].on_click(self._cb_bt_save)
self.widgets["load_button"].on_click(self._cb_bt_load)
self.widgets["report_button"].on_click(self._cb_bt_make_report_search)
self.widgets["articles_button"].on_click(self._cb_bt_make_report_article_saver)
self.widgets["show_advanced_chb"].observe(self._cb_chkb_advanced, names="value")
def _init_ui(self):
css_style = style.get_css_style()
display(HTML(f"<style> {css_style} </style>"))
page_selection = widgets.HBox(
children=[
self.widgets["page_back"],
self.widgets["page_label"],
self.widgets["page_forward"],
]
)
self.children = [
self.widgets["query_text"],
self.widgets["show_advanced_chb"],
self.widgets["advanced_settings"],
self.widgets["investigate_button"],
widgets.HBox(
children=(self.widgets["save_button"], self.widgets["load_button"])
),
page_selection,
self.widgets["out"],
page_selection,
widgets.HBox(
children=(self.widgets["status"], self.widgets["status_clear"])
),
self.widgets["report_button"],
self.widgets["articles_button"],
]
with self.widgets["out"]:
init_text = r"""
____ ____ _____
| _ \| _ \ / ____|
| |_) | |_) | (___
| _ <| _ < \___ \
| |_) | |_) |____) |
|____/|____/|_____/
Click on "Search Literature!" button to display some results.
"""
print(textwrap.dedent(init_text))
[docs] @staticmethod
def highlight_in_paragraph(paragraph, sentence):
"""Highlight a given sentence in the paragraph.
Parameters
----------
paragraph : str
The paragraph in which to highlight the sentence.
sentence : str
The sentence to highlight.
Returns
-------
formatted_paragraph : str
The paragraph containing `sentence` with the sentence highlighted
in color
"""
start = paragraph.index(sentence)
end = start + len(sentence)
highlighted_paragraph = f"""
<div class="paragraph">
{paragraph[:start]}
<div class="paragraph_emph"> {paragraph[start:end]} </div>
{paragraph[end:]}
</div>
"""
return highlighted_paragraph
def _fetch_result_info(self, sentence_id):
"""Fetch information for a sentence ID from the database.
Parameters
----------
sentence_id : int
The sentence_id for a search result.
Returns
-------
result_info : dict
A dictionary containing the following fields:
"sentence_id"
"paragraph_id"
"article_id"
"article_title"
"article_auth"
"ref"
"section_name"
"text"
"""
sentence = retrieve_sentences_from_sentence_ids(
sentence_ids=(sentence_id,), engine=self.bbs_mysql_engine
)
article_id, section_name, text, paragraph_id = sentence.iloc[0][
["article_id", "section_name", "text", "paragraph_pos_in_article"]
]
article = retrieve_article_metadata_from_article_id(
article_id=article_id, engine=self.bbs_mysql_engine
)
article_auth, article_title, ref = article.iloc[0][["authors", "title", "url"]]
try:
article_auth = article_auth.split(";")[0] + " et al."
except AttributeError:
article_auth = ""
ref = (
ref.split(";")[0]
if ref is not None
else "https://www.google.com/search?q=" + quote(article_title)
)
section_name = section_name or ""
result_info = {
"sentence_id": sentence_id,
"paragraph_id": int(paragraph_id),
"article_id": article_id,
"article_title": article_title,
"article_auth": article_auth,
"ref": ref,
"section_name": section_name,
"text": text,
}
return result_info
[docs] def print_single_result(self, result_info, print_whole_paragraph):
"""Retrieve metadata and complete the report with HTML string given sentence_id.
Parameters
----------
result_info : dict
The information for a single result obtained by calling
`_fetch_result_info`.
print_whole_paragraph : bool
If true, the whole paragraph will be displayed in the results of the widget.
Returns
-------
article_metadata : str
Formatted string containing the metadata of the article.
formatted_output : str
Formatted output of the sentence.
"""
sentence_id = result_info["sentence_id"]
text = result_info["text"]
ref = result_info["ref"]
article_title = result_info["article_title"]
article_auth = result_info["article_auth"]
section_name = result_info["section_name"]
width = 80
if print_whole_paragraph:
try:
paragraph = retrieve_paragraph_from_sentence_id(
sentence_id, self.bbs_mysql_engine
)
formatted_output = self.highlight_in_paragraph(paragraph, text)
except Exception as err:
formatted_output = f"""
There was a problem retrieving the paragraph.
The original sentence is: {text}
The error was: {str(err)}
"""
else:
formatted_output = textwrap.fill(text, width=width)
# color_title = '#1A0DAB'
# color_metadata = '#006621'
article_metadata = f"""
<a href="{ref}">
<div class="article_title">
{article_title}
</div>
</a>
<div class="metadata">
{article_auth} · {section_name.lower().title()}
</div>
"""
article_metadata = textwrap.dedent(article_metadata)
return article_metadata, formatted_output
def _collect_search_configuration(self):
"""Read the search configuration from the widget components.
Returns
-------
search_configuration : dict
The search configuration.
"""
search_configuration = {
"which_model": self.widgets["sent_embedder"].value,
"k": self.widgets["top_results"].value,
"query_text": self.widgets["query_text"].value,
"granularity": self.widgets["granularity"].value,
"has_journal": self.widgets["has_journal"].value,
"is_english": self.widgets["is_english"].value,
"discard_bad_sentences": self.widgets["discard_bad_sentences"].value,
"date_range": self.widgets["date_range"].value,
"deprioritize_text": self.widgets["deprioritize_text"].value,
"deprioritize_strength": self.widgets["deprioritize_strength"].value,
"exclusion_text": self.widgets["exclusion_text"].value
if "exclusion_text" in self.widgets.keys()
else "",
"inclusion_text": self.widgets["inclusion_text"].value,
"verbose": False,
}
return search_configuration
def _query_search_server(self, search_configuration):
"""Query the search server.
Parameters
----------
search_configuration : dict
The search configuration.
Returns
-------
result : dict or None
If the query was successful then a dictionary with the query
results is returned. In case of an error None is returned.
"""
try:
response = requests.post(self.bbs_search_url, json=search_configuration)
response.raise_for_status() # if not response.ok
except requests.ConnectionError as e:
print(f"Could not connect to the search server.\n\n{e}", file=sys.stderr)
result = None
except requests.HTTPError as e:
# raised by response.raise_for_status()
print(f"There was an HTTP error.\n\n{e}", file=sys.stderr)
result = None
else:
result = response.json()
return result
def _cb_bt_investigate(self, change_dict):
"""Investigate button callback."""
# Clear output and show waiting message
timer = Timer()
self.widgets["out"].clear_output()
self.widgets["status"].clear_output()
with self.widgets["status"]:
search_configuration = self._collect_search_configuration()
header = f'Query: "{search_configuration["query_text"]}"'
print(header)
print("-" * len(header))
print(f"INFO: Database {self.database_name} is used for the search query.")
print("Sending query to server...".ljust(50), end="", flush=True)
with timer("server query"):
response = self._query_search_server(search_configuration)
if response is None:
return
else:
self.current_sentence_ids = response["sentence_ids"]
print(f'{timer["server query"]:7.2f} seconds')
print("Processing search results...".ljust(50), end="", flush=True)
with timer("default saving"):
self._process_search_results()
print(f'{timer["default saving"]:7.2f} seconds')
print("Updating the results display...".ljust(50), end="", flush=True)
with timer("update page"):
self.n_pages = math.ceil(
len(self.current_sentence_ids) / self.results_per_page
)
self.set_page(0, force=True)
print(f'{timer["update page"]:7.2f} seconds')
print("Done.")
def _cb_bt_save(self, change_dict):
with self.widgets["status"]:
self.widgets["status"].clear_output()
if not self.article_saver.state or not self.history:
message = """No articles or paragraphs selected. Did you forget
to run your query or select some search results?"""
display(HTML(f'<div class="bbs_error"> <b>ERROR!</b> {message} </div>'))
return
display(HTML("Saving search results to disk... "))
data = {
"article_saver_state": list(self.article_saver.state),
"search_widget_history": self.history,
"database_name": self.database_name,
"search_server_version": self.search_server_version,
}
with self.checkpoint_path.open("w") as f:
json.dump(data, f)
self.widgets["status"].clear_output()
display(
HTML(
"Saving search results to disk... "
'<b class="bbs_success"> DONE!</b></br>'
)
)
def _cb_bt_load(self, change_dict):
with self.widgets["status"]:
self.widgets["status"].clear_output()
if not self.checkpoint_path.exists():
message = """No checkpoint file found to load. Did you forget to
save your search results?"""
display(
HTML(f'<div class="bbs_error"> ' f"<b>ERROR!</b> {message} </div>")
)
return
display(HTML("Loading search results from disk... "))
with self.checkpoint_path.open("r") as f:
data = json.load(f)
self.article_saver.state = {tuple(t) for t in data["article_saver_state"]}
self.history = data["search_widget_history"]
self.widgets["status"].clear_output()
display(
HTML(
"Loading search results from disk... "
'<b class="bbs_success"> DONE!</b></br>'
)
)
vers_load = data["search_server_version"]
vers_curr = self.search_server_version
db_load = data["database_name"]
db_curr = self.database_name
if db_load != db_curr or vers_load != vers_curr:
message = f"""Loaded data from
<ul>
<li> search server version = {vers_load} </li>
<li> database version = {db_load} </li>
</ul>
but current widget is connected to
<ul>
<li> search server version = {vers_curr} </li>
<li> database version = {db_curr} </li>
</ul>
"""
display(
HTML(
f'<div class="bbs_warning"> '
f"<b>WARNING!</b> {message} </div>"
)
)
def _process_search_results(self):
"""Flag items corresponding to sentence IDs for saving.
The default saving strategy is given by the corresponding
saving setting widget state.
This also updates the search history.
"""
default_saving_value = self.widgets["default_value_article_saver"].value
sentence_df = retrieve_sentences_from_sentence_ids(
sentence_ids=self.current_sentence_ids,
engine=self.bbs_mysql_engine,
keep_order=True,
)
for row in sentence_df.itertuples(index=False):
self.history.append(
(row.article_id, row.paragraph_pos_in_article, row.sentence_id)
)
if self.article_saver is not None:
if default_saving_value == _Save.ARTICLE:
self.article_saver.add_article(row.article_id)
elif default_saving_value == _Save.PARAGRAPH:
self.article_saver.add_paragraph(
row.article_id, row.paragraph_pos_in_article
)
[docs] def saved_results(self):
"""Get all search results that were flagged for saving.
Returns
-------
saved_items_df : pd.DataFrame
A data frame with all saved search results.
"""
# Get all titles first
article_ids = [article_id for article_id, *_ in self.history]
titles = get_titles(article_ids, self.bbs_mysql_engine)
# For each item in history get its saving status
rows = []
columns = ["Article ID", "Paragraph #", "Paragraph", "Article", "Title"]
markers = {True: "✓", False: ""}
for article_id, paragraph_pos, _sentence_id in self.history:
# Get saving status from the article saver
if self.article_saver is None:
paragraph_saved = False
article_saved = False
else:
paragraph_saved = self.article_saver.has_paragraph(
article_id, paragraph_pos
)
article_saved = self.article_saver.has_article(article_id)
# Dont' show paragraph position if no paragraph saved
if not paragraph_saved:
paragraph_pos = ""
# Don't show items that are not saved
if any([paragraph_saved, article_saved]):
row = (
article_id,
paragraph_pos,
markers[paragraph_saved],
markers[article_saved],
titles[article_id],
)
rows.append(row)
saved_items_df = pd.DataFrame(rows, columns=columns)
return saved_items_df
[docs] def set_page(self, new_page, force=False):
"""Go to a given page in the results view.
Parameters
----------
new_page : int
The new page number to go to.
force : bool
By default, if `new_page` is the same one as the one
currently viewed, the the page is not reloaded. To reload
the page set this parameter to True. This is ueful when
new results have been fetched and so the view needs to
be updated.
"""
new_page = max(0, min(new_page, self.n_pages - 1))
if self.current_page != new_page or force:
self.current_page = new_page
page_label = f"Page {self.current_page + 1} of {self.n_pages}"
self.widgets["page_label"].value = page_label
self._update_page_display()
def _update_page_display(self):
with self.widgets["out"]:
print_whole_paragraph = self.widgets["print_paragraph"].value
self.radio_buttons = []
self.widgets["out"].clear_output()
start = self.current_page * self.results_per_page
end = start + self.results_per_page
for sentence_id in self.current_sentence_ids[start:end]:
result_info = self._fetch_result_info(sentence_id)
article_metadata, formatted_output = self.print_single_result(
result_info, print_whole_paragraph
)
if self.article_saver:
chk_article, chk_paragraph = self._create_saving_checkboxes(
result_info["article_id"], result_info["paragraph_id"]
)
display(HTML(article_metadata))
if self.article_saver:
# display(radio_button)
display(chk_paragraph)
display(chk_article)
display(HTML(formatted_output))
print()
def _cb_chkb_save_paragraph(self, change, article_id=None, paragraph_id=None):
if change["new"] is True:
self.article_saver.add_paragraph(article_id, paragraph_id)
else:
self.article_saver.remove_paragraph(article_id, paragraph_id)
def _cb_chkb_save_article(self, change, article_id=None):
if change["new"] is True:
self.article_saver.add_article(article_id)
else:
self.article_saver.remove_article(article_id)
def _cb_chkb_advanced(self, change_dict):
if change_dict["new"]:
self.widgets["advanced_settings"].layout.display = "block"
else:
self.widgets["advanced_settings"].layout.display = "none"
def _create_saving_checkboxes(self, article_id, paragraph_id):
chk_paragraph = widgets.Checkbox(
value=False,
description=self.saving_labels[_Save.PARAGRAPH],
indent=False,
disabled=False,
)
chk_article = widgets.Checkbox(
value=False,
description=self.saving_labels[_Save.ARTICLE],
indent=False,
disabled=False,
)
chk_paragraph.observe(
handler=functools.partial(
self._cb_chkb_save_paragraph,
article_id=article_id,
paragraph_id=paragraph_id,
),
names="value",
)
chk_article.observe(
handler=functools.partial(
self._cb_chkb_save_article, article_id=article_id
),
names="value",
)
if self.article_saver is None:
chk_paragraph.disabled = True
chk_article.disabled = True
else:
# Check if this article/paragraph has been saved before
if self.article_saver.has_paragraph(article_id, paragraph_id):
chk_paragraph.value = True
if self.article_saver.has_article(article_id):
chk_article.value = True
return chk_article, chk_paragraph
def _cb_bt_make_report_article_saver(self, change_dict):
"""Create the saved articles report."""
with self.widgets["status"]:
print()
print("Creating the saved results report... ")
out_file = self.article_saver.make_report()
print(f"Done. Report saved to {out_file}.")
def _cb_bt_make_report_search(self, change_dict):
"""Create the report of the search."""
with self.widgets["status"]:
print()
print("Creating the search results report... ")
hyperparameters_section = f"""
<h1> Search Parameters </h1>
<ul class="paragraph">
<li> {'</li> <li>'.join([
'<div class="paragraph_emph">' +
' '.join(k.split('_')).title() +
'</b>' +
f': {repr(v.value)}'
for k, v in self.widgets.items()
if hasattr(v, 'value')])}
</li>
</ul>
"""
print_whole_paragraph = self.widgets["print_paragraph"].value
report = ""
for sentence_id in self.current_sentence_ids:
result_info = self._fetch_result_info(sentence_id)
article_metadata, formatted_output = self.print_single_result(
result_info, print_whole_paragraph
)
report += article_metadata + formatted_output + "<br>"
results_section = f"<h1> Results </h1> {report}"
css_style = style.get_css_style()
output_file = pathlib.Path(f"report_{datetime.datetime.now()}.html")
with output_file.open("w") as f:
f.write("<!DOCTYPE html>\n")
f.write(f"<style> {css_style} </style>")
f.write(hyperparameters_section)
f.write(results_section)
print(f"Done. Report saved to {output_file}.")