Source code for snowballing.config

"""This module configures the snowballing.

Please, use the database __init__ to replace these configurations.
"""
import textwrap
from copy import copy
from pathlib import Path

from string import ascii_lowercase
from bibtexparser.customization import homogenize_latex_encoding
from IPython.display import HTML
from selenium import webdriver

from .collection_helpers import callable_get, define_cvar
from .collection_helpers import consume, setitem, remove_empty
from .config_helpers import reorder_place, last_name_first_author
from .config_helpers import var_item, str_list, str_item, sequence
from .config_helpers import work_by_varname, find_work_by_info, Site
from .config_helpers import generate_title
from .rules import ModifyRules
from .utils import compare_str, match_any, to_list

# Tool version
JOHN_SNOW_VERSION = "1.0.0"

# Database path
DATABASE_DIR = Path.home() / "database"

# Text editior path
TEXT_EDITOR = None
# Text editor argument for opening in a given line.
# Use a format string with arguments {year_path} and {line}
LINE_PARAMS = None

# Web Driver
WEB_DRIVER = lambda: webdriver.Chrome()

# Run widget
# Use True to indicate that widgets that generate executable code should have a text area with a button for running the code.
# Otherwise, they modify notebook cells (unsafe)
RUN_WIDGET = True 

# PDF Extractor. Use string argumen {path} to define the path
PDF_EXTRACTOR = None
PDF_EXTRACTOR = "java -jar refExtractor.jar full {path}"

# List of possible work class tuples
# Each tuple has the follwing elements:
#   Class Name
#   Category name
#   Graph visibility (Options: display, hide, always_hide)
#   Graph node color
#   Graph node text color
CLASSES = [
    ("Work", "work", "display", "#FFD86E", "black"),
    ("WorkSnowball", "snowball", "display", "#6DCE9E", "white"),
    ("WorkOk", "ok", "display", "#68BDF6", "white"),
    ("WorkUnrelated", "unrelated", "hide", "#DE9BF9", "white"),
    ("WorkNoFile", "nofile", "hide", "#A5ABB6", "white"),
    ("WorkLang", "lang", "hide", "#ff8040", "white"),
    ("Site", "site", "hide", "#000080", "white"),
    ("Email", "site", "hide", "#000080", "white"),
]
# Default class for insertion
DEFAULT_CLASS = "Work"

# Similary Ratio for matching places
SIMILARITY_RATIO = 0.8

# Check Deprecation
CHECK_DEPRECATION = True


### Fields

# Debug fields during BibTeX export
DEBUG_FIELDS = True

# List of exportable work fields to BibTeX
WORK_FIELDS = [
    "entrytype", "year", "name", "authors", "place",
    "booktitle", "bookauthors", "edition", "available",
    "volume", "number", "section", "pp", "article",
    "doi", "isbn",  "proceedings", "issn",
    "organization", "publisher", "school", "institution", "track",
    "ref", "local", "editors", "awards",
    "special", "website", "link", "scholar", "shorttitle", "address",
]

# Regexes that starts with ^ and ends with $
BIBTEX_IGNORE_FIELDS = [
    "excerpt", "month", "bookname", "url", "ID",

    # Tool
    "_.*", "force_.*", "file.*", "category", "alias", "aliases", "scholar_ok",
    "scholar", "cluster_id", "scholar_id", "display", "metakey", "due", "_tyear",
    "citation_file", "notes", "tracking", "snowball", "request", "_draw",
    "may_be_related_to", "note",
]


### Transformation Rules

def _place_value(place1):
    """Get place value based on similarity

    It considers a match if the matching ratio for the place name
    is >= config.SIMILARITY_RATIO (0.8).

    It also considers a match if the acronym matches.

    Doctest:

    .. doctest::

        >>> info = {"place1": "IPAW"}
        >>> _place_value("IPAW")
        'IPAW'
        >>> _place_value("Software Engineering, International Conference on")
        'ICSE'
        >>> _place_value("A random conference") is None
        True
    """
    from .operations import load_places_vars
    place = reorder_place(place1)

    maxmatch = max(
        (max(
            compare_str(place, varvalue.name),
            1 if place == varvalue.acronym else 0
        ), varname, varvalue) for varname, varvalue in load_places_vars()
    )

    if maxmatch[0] >= SIMILARITY_RATIO or maxmatch[2].name in place:
        return maxmatch[1]
    
def _pos_diff(work, info, result):
    """Remove from diff if result in list or alias"""
        
    keys = set(result["set"])
    def delkey(key, from_set=True):
        if key in result["set"]:
            del result["set"][key]
        if key in result["show"]:
            del result["show"][key]
        if from_set:
            keys.remove(key)
    if hasattr(work, "ignore") and isinstance(work.ignore, list):
        for key in work.ignore:
            delkey(key)
    if SCHOLAR_MAP["scholar_ok"] in keys:
        after, before = result["set"][SCHOLAR_MAP["scholar_ok"]]
        if before:
            delkey(SCHOLAR_MAP["scholar_ok"])
    if SCHOLAR_MAP["scholar"] in keys:
        after, before = result["set"][SCHOLAR_MAP["scholar"]]
        after = [x.replace("hl=pt-BR", "hl=en") for x in to_list(after or [])]
        before = [x.replace("hl=pt-BR", "hl=en") for x in to_list(before or [])]
        if len(after) == 1:
            after = after[0]
        if len(before) == 1:
            before = before[0]
        result["set"][SCHOLAR_MAP["scholar"]] = (after, before) 
    aliases = get_work_aliases(work)
    if ATTR["name"] in keys:
        after, before = result["set"][ATTR["name"]]
        for alias in aliases:
            if alias[1] == after:
                delkey(ATTR["name"])
    if "authors" in keys:
        after, before = result["set"]["authors"]
        for alias in aliases:
            if len(alias) > 2 and alias[2] == after:
                delkey("authors")
                break
    for key in keys:
        after, before = result["set"][key]
        if isinstance(before, (list, tuple)) and after in before or after == before:
            delkey(key, from_set=False)
        

# Map BibTex to Info object
BIBTEX_TO_INFO = {
    "<before>": [
        ("place1", ""),
        ("year", lambda x: int(
            (x["year"][:4] or 0) if "[in press]" in x.get("year", "")
            else x.get("year", 0)
        )),
    ],
    "<middle>": [
        ("place", lambda old, new: _place_value(new["place1"])),
        ("display", lambda old, new: last_name_first_author(new["authors"])),
        ("pyref", lambda old, new: info_to_pyref(new)),
    ],
    "<after>": [
        ("name", lambda old, new, current: new["name"].replace("{", "").replace("}", ""))
    ],
    "title": ["name"],
    "author": ["authors"],
    "address": ["local"],
    "publisher": ["organization"],
    "pages": ["pp"],
    "ENTRYTYPE": ["entrytype"],
    "journal": ["place1"],
    "booktitle": ["place1"],
    "year": [
        ("note", lambda x: "in press" if "[in press]" in x.get("year", "") else None)
    ],
    "<article>": [
        ("excerpt", lambda article, new: article.attrs["excerpt"][0]),
        ("cluster_id", lambda article, new: article.attrs["cluster_id"][0]),
        ("scholar", lambda article, new: article.attrs["url_citations"][0]),
        ("div", lambda article, new: article.div),
    ],
    "<set_ignore_keys>": {"excerpt", "div", "pyref", "display", "place", "_work_type", "_ref"},
    "<set_ignore_but_show>": {"place1", "year"},
    "<set_always_show>": {"name", "authors", "entrytype", "place1", "year"},
    "<set_order>": ["name", "authors", "entrytype", "place1", "year"],
    "<scholar_ok>": "scholar_ok",
    "<set_before>": lambda work, info: [
        setattr(work, "entrytype", work.place.type)
            if not hasattr(work, "entrytype")
            and hasattr(work, "place") else 
            None,
        setattr(info, "place1", info["place"])
            if "place" in info
            and not "place1" in info else
            None,
        setattr(work, "place1", "{} ({})".format(work.place.name, work.place.acronym))
            if hasattr(work, "place") else
            None,
    ],
    "<pos_diff>": _pos_diff
}


# Map BibTex to Info object. Set _work_type=Work
BIBTEX_TO_INFO_WITH_TYPE = (
    ModifyRules(BIBTEX_TO_INFO, "with_type")
    .append("<before>", ("_work_type", "Work"))
).rules


# Map BibTex to Info object. Ignore ID attribute
BIBTEX_TO_INFO_IGNORE_SET_ID = (
    ModifyRules(BIBTEX_TO_INFO, "ignore_set")
    .add("<set_ignore_keys>", "ID")
).rules


# Convert Article info to Info object
#   If the work exists, it should populate the attribute _nwork (use find_work_by_info)
ARTICLE_TO_INFO = {
    "<after>": [
        (None, lambda old, new, current:
            [
                setitem(new, "pyref", 'Site("{name}", "{url}")'.format(**new)),
                setitem(new, "_nwork", Site(new["name"], new["url"]))
            ] if new.get("_work_type") == "Site" else
            [
                setitem(new, "_nwork", work_by_varname(new["pyref"])),
                setitem(new, "name", new["_nwork"].name),
                setitem(new, "place", new["_nwork"].place.name),
            ] if new.get("_work_type") == "Ref" else 
            [
                setitem(new, "place", _place_value(new["place1"])) if "place" not in new else None,
                setitem(new, "display", last_name_first_author(new["authors"])) if "display" not in new else None,
                setitem(new, "pyref", info_to_pyref(new)) if "pyref" not in new else None,
                setitem(new, "_nwork", find_work_by_info(new, set())),
            ]
        
        )
    ],
    "div": [None],
    "citation_id": [None],
    "<article>": [
        (None, lambda article, new: 
            [
                setitem(article, "_show_site", True),
                setitem(article, "_ref", article.get("citation_id", "")),
            ] if new.get("_work_type") == "Site" else
            [
                setitem(article, "name", new.get("name")),
                setitem(article, "place", new.get("place")),
                setitem(article, "_ref", article.get("citation_id", "")),
            ] if new.get("_work_type") == "Ref" else [
                setitem(article, "_ref", article.get("citation_id", "")),
            ]
           
        ),
    ]
}


# Map Info object to Insert Code
INFO_TO_INSERT = {
    "<before>": [
        ("work_type", "Work"), ("due", ""), ("related", ""),
        ("display", ""), ("authors", ""), ("the_place", ""), ("other", ""),
    ],
    "<middle>": [
        ("the_place", sequence([
            var_item("place", obj="new"),
            str_item("place1", obj="new"), 
        ])),
        ("other", lambda old, new, current: [
            "{}={!r},".format(key, consume(current, key)) # .replace('"', r'\"')
            for key in copy(current)
        ]),
        ("attributes", lambda old, new: "\n            ".join(remove_empty([
            new["due"], new["related"], new["display"],
            new["authors"], new["the_place"]
        ] + new["other"]))),
    ],
    "<result>": lambda old, new, current: textwrap.dedent("""
        {pyref} = DB({work_type}(
            {year}, "{name}",
            {attributes}
        ))""".format(**new)),
    "_work_type": ["work_type"],
    "year": ["year"],
    "name": ["name"],
    "authors": [("authors", str_item("authors"))],
    "display": [("display", str_item("display"))],
    "pyref": ["pyref"],
    "place": ["place"],
    "place1": ["place1"],
    "may_be_related_to": [("related", str_list("may_be_related_to"))],
    "due": [("due", str_item("due"))] ,
    "excerpt": [None],
    "_ref": [None],
}


# Modify Info object for finding work
#   <skip> and <ignore> only exist for this
FIND_INFO_WORK = {
    "<skip>": [
        ("_work_type", "Site"),
    ],
    "<ignore>": ["_year", "_work", "_key"],
    "<before>": [
        ("place", lambda old: 
            None if "entrytype" not in old else
            "Thesis" if old["entrytype"] in ("phdthesis", "mastersthesis") else
            "TechReport" if old["entrytype"] == "techreport" else
            "Book" if old["entrytype"] == "book" else
            None
        ), 
        ("_work", lambda old: work_by_varname(old["ID"]) if "ID" in old else None),
        ("_key", lambda old: old["ID"] if "ID" in old else None),
    ],
    "year": ["year", "_year"],
    "school": ["local"],
}


# Convert Work into BibTex dict
#   The "new" object starts with three parameters
#     _name: override metakey name
#     _homogeneize: bibtex homogeneize
#     _acronym: use acronym instead of full place text
#   The result do not need these attributes
def _work_to_bibtex_middle(work, new, current):
    if DEBUG_FIELDS:
        use = callable_get(WORK_TO_BIBTEX, "<use>", [])
        ignore = callable_get(WORK_TO_BIBTEX, "<ignore>", [])
        for key in dir(work):
            if not match_any(key, use) and not match_any(key, ignore):
                print("[DEBUG WARNING]", work.display, work.year, key)
    new_place_key = {
        "incollection": "booktitle",
        "inproceedings": "booktitle",
        "misc": "booktitle",
        "article": "journal",
        "book": None,
        "mastersthesis": None,
        "phdthesis": None,
        "techreport": None,
        "": None
    }[new.get("ENTRYTYPE", "")]
    if new_place_key and hasattr(work, "place"):
        conference_has_acronym = (
            new["_acronym"]
            and work.place.type == "Conference"
            and not work.place.acronym.startswith("<")
        )
        if conference_has_acronym:
            new[new_place_key] = str(work.place.acronym)
        else:
            new[new_place_key] = str(work.place)
    elif new_place_key and hasattr(work, "place1"):
        new[new_place_key] = str(work.place1)

WORK_TO_BIBTEX = {
    "<before>": [
        ("ID", lambda work, new, current: (
            new["_name"] if new.get("_name", None) else
            work.metakey if hasattr(work, "metakey") else
            (lambda split: (split[0] if split else "a") + str(work.year))(work.authors.split())
        )),
    ],
    "<middle>": [
        (None, _work_to_bibtex_middle),
    ],
    "<ignore>": lambda: BIBTEX_IGNORE_FIELDS + ["place", "place1"],
    "<use>": lambda: WORK_FIELDS,
    "<result>": lambda work, new, current: (
        [
            consume(new, "_name"),
            consume(new, "_acronym"),
            homogenize_latex_encoding(new) if consume(new, "_homogeneize") else new,
        ][-1]
    ), 
    "name": ["title"],
    "authors": ["author"],
    "local": ["address"],
    "organization": ["publisher"],
    "pp": ["pages"],
    "entrytype": ["ENTRYTYPE"],
}


### Snowballing Forms

[docs]def convert_citation_text_lines_to_info(text):
    """Convert lines of format [N] author name place other year' to Info object
    If it is an invalid format, it should return "Incomplete".

    Default: Recognizes 3 patterns:
      - Citation: [N] > varname
      - Site: [N] SiteName http://
      - Work: [N] author name place other year
        - 'other' can be either lines with values, or use the format key=value

    """
    lines = text.strip().split("\n")
    info = {
        "citation_id": lines[0].strip(),
    }
    found = False
    other = []

    if lines[-1].strip().startswith(">") and len(lines) >= 2:
        # [N] > varname
        info["pyref"] = lines[-1][1:].strip()
        info["_work_type"] = "Ref"
        found = True
        other = lines[1:-1]
    elif lines[-1].strip().startswith("http") and len(lines) >= 3:
        # [N] WebName http://...
        info["name"] = lines[1].strip()
        info["url"] = lines[-1].strip()
        info["_work_type"] = "Site"
        found = True
        other = lines[2:-1]
    elif len(lines) >= 5 and lines[-1].strip().isnumeric():
        # [N] author name place other year
        info["authors"] = lines[1].strip()
        info["name"] = lines[2].strip()
        info["place1"] = lines[3].strip()
        info["year"] = int(lines[-1].strip())
        info["_work_type"] = "Work"
        found = True
        other = lines[4:-1]

    if found:
        for num, line in zip(range(1, 10000), other):
            line = line.strip()
            split = line.split("=")
            if len(split) > 1:
                info[split[0]] = "=".join(split[1:])
            else:
                info["other{}".format(num)] = line
        return info
    
    return "Incomplete"


[docs]def info_to_pyref(info):
    """Create pyref for info
    
    Doctest:

    .. doctest::
        >>> info_to_pyref({'display': 'pimentel', 'year': 2017})
        'pimentel2017a'
        >>> info_to_pyref({'display': 'pimentel', 'year': 2015})
        'pimentel2015b'
    """
    pyref = "{display}{year}".format(**info)
    for letter in ascii_lowercase:
        if not work_by_varname(pyref + letter):
            break
    pyref += letter
    return pyref


[docs]def set_info_letter(info, letter):
    """Set letter of info object"""
    if info["pyref"][-1].isalpha():
        info["pyref"] = info["pyref"][:-1]
    info["pyref"] += letter
    info["display"] += " " + letter


# Insert form
FORM = {
    # List of widgets
    #   Each widget has at least 3 fields
    #     Type (text, dropdown, toggle, button)
    #     Label
    #     Variable
    #   Some widget requ-ire extra fields
    #     text, dropdown, and toggle: 4th field indicates the value
    #     dropdown: 5th field is a list with options
    "widgets": [
        # Base
        [
            "dropdown", "Type", "work_type", DEFAULT_CLASS, 
            [tup[0] for tup in CLASSES]
        ],
        ["toggle", "File", "file_field", False],
        ["text", "Due", "due", ""],
        ["text", "Place", "place", ""],
        ["text", "Year", "year", ""],
        ["text", "Prefix Var", "prefix", ""],
        ["text", "PDFPage", "pdfpage", ""],
        ["text", "Related", "may_be_related_to", None],
        ["text", "Display", "display", None],
        ["text", "Link", "link", None],
        # Buttons
        ["button", "Ok", "_b1"],
        
    ],
    # List of events
    #   Each event has 3 fields
    #     Widget variable
    #     Listener (observe, click)
    #     Action. It can be:
    #       A dictionary indicating which values to chance
    #       A list with the action (first item is a str) and its parameters (other actions)
    #       A list of actions to run in sequence
    "events": [
        # Base
        ["due", "observe", [
            ["if",
                ["and",
                    ["!=", ":due", ""],
                    ["==", ":work_type", "Work"]
                ],
                {
                    "work_type": "WorkUnrelated",
                },
                ["if",
                    ["and",
                        ["==", ":due", ""],
                        ["==", ":work_type", "WorkUnrelated"]
                    ],
                    {
                        "work_type": "Work"
                    },
                    []

                ]
            ]

        ]],
        ["place", "observe", [
            ["if",
                ["and",
                    ["==", ":place", "Lang"],
                    ["==", ":work_type", "Work"]
                ],
                {
                    "work_type": "WorkLang"
                },
                []
            ]
        ]],
        # Buttons
        ["_b1", "click", [
            {
                "work_type": "WorkOk",
                "file_field": True,
            },
            ["reload"],
        ]],
    ],
    # Exhibition of the widges.
    #   Each list indicates a row with other widgets
    #   I suggest using no more than 4 buttons in a row 
    #   and no more than 2 inputs in a row
    "order": [
        ["_b1"],
        ["work_type", "file_field"],
        ["due", "place"],
        ["year", "prefix"],
        ["may_be_related_to", "display"],
        ["summary", "link"],
    ],
    # Show operation
    #   Use the same "DSL" as the events to update info object.
    "show": [
        ["if", ["==", ".place", "Lang"],
            {"work_type": "WorkLang"},
            []
        ],
        ["update_info", "due", ":due", None, ""],
        ["update_info", "place", ":place", None, ""],
        ["update_info", "_work_type", ":work_type", None, "Work"],
        ["if", 
            ["or",
                ["update_info", "year", ":year", None, ""],
                ["update_info", "display", ":prefix", None, ""],
            ],
            ["pyref"],
            []
        ],
        ["update_info", "file", ":file_field", ["+", ".pyref", r"\.pdf"], False],
        ["update_info", "file", ":pdfpage", ["+", ".file", "#page=", ":pdfpage"], ""],
        ["update_info", "may_be_related_to", ":may_be_related_to", None, ""],
        ["update_info", "display", ":display", None, ""],
        ["update_info", "link", ":link", None, ""],
    ]
}


### Checks

[docs]def check_insertion(nwork, info, citation_var, citation_file, should_add, ref=""):
    """Check info validity after executing snowballing.create_info_code
    Return dictionary of warning fields

    Default: returns place1 if place is not defined and always returns the pdf name 
    """
    result = {}
    result["pdf"] = "{}.pdf".format(info["pyref"])
    if "place" not in info and info.get("_work_type") not in ("Site", "Ref"):
        result["place1"] = info["place1"]
    return result


[docs]def check_load(work, varname, warning=lambda x: print(x)):
    """Check conditions on load"""
    if not hasattr(work, "scholar_id"):
        warning("[Warning] Work {} does not have scholar_id".format(varname))
    if getattr(work, "place", None) is None:
        warning("[Error] Work {} does not have place".format(varname))


### Snowballing display

[docs]def display_article(article):
    """Display article in widget"""
    if "div" in article:
        return [
            HTML("""
                <style>
                .gs_or_svg {
                    position: relative;
                    width: 29px;
                    height: 16px;
                    vertical-align: text-bottom;
                    fill: none;
                    stroke: #1a0dab;
                }
                </style>
            """),
            HTML(repr(article["div"]))
        ]
    else:
        return [
            article["name"]
        ]


### Query Scholar

[docs]def query_str(work):
    """Return string to query work on scholar"""
    return work.name + " " + work.authors


### Work Attributes

[docs]def work_post_init(work):
    """Instructions to execute after Work __init__

    Default: set display to title if display is None
    """
    if getattr(work, "display", None) is None:
        work.display = work.name


[docs]def work_eq(first, second):
    """Compare work
    
    Default: compare place, title and year
    """
    if getattr(first, "place", None) != getattr(second, "place", None):
        return False
    return (
        first.name == second.name
        and first.year == second.year
    )


[docs]def work_hash(work):
    """Uniquely identify work

    Default: use title and year
    """
    return hash((work.name, work.year))


[docs]def info_work_match(info, work):
    """Check if an Info object matches to a Work object
    
    Default: checks for exact matches by cluster_id, scholar_id, (year, name, authors) of aliases
      and similar titles
      It uses matches on places and years to reduce the similarity requirements for matching titles
    """
    if info.get("cluster_id", "<ii>") in getattr(work, "cluster_id", "<iw>"):
        return True
    if info.get("scholar_id", "<ii>") in getattr(work, "scholar_id", "<iw>"):
        return True
    
    for alias in get_work_aliases(work):
        condition = (
            alias[0] == info["year"]
            and alias[1] == info["name"]
            and (
                len(alias) > 2 and alias[2] == info["authors"]
                or getattr(work, "authors", None) == info["authors"]
            )
        )
        if condition:
            return True

    required = 0.9
    if info["year"] == 0:
        required += 0.1

    same_place = (
        "place" in info
        and hasattr(work, "place")
        and getattr(
            getattr(MODULES["places"], info["place"], None),
            "name", "<ii>"
        ) == getattr(
            getattr(work, "place", None),
            "name", "<iw>"
        )
    )
    if same_place:
        required -= 0.1
    
    if compare_str(str(getattr(work, "name")), info.get("name")) > required:
        return True

    return False


[docs]def work_display(work):
    """Get work display"""
    return work.display



### Aliases

[docs]def get_work_aliases(work):
    """Get list of aliases from work

    Default: the tool uses the attributes "alias" and "aliases" to represent aliases
    """
    if hasattr(work, "alias"):
        return [work.alias]
    if hasattr(work, "aliases"):
        return work.aliases
    return []


[docs]def get_alias_year(work, alias):
    """Get year from alias

    Default: return first element of tuple/list that represents the alias    
    """
    return alias[0]


### Graph Attributes

[docs]def graph_place_text(work):
    """Get place text for graph
    
    Default: return place acronym
    """
    if getattr(work, "place", None) is not None:
        return work.place.acronym


[docs]def graph_place_tooltip(work):
    """Generate place tooltip

    Default: tooltip with all information from Place object
    """
    return generate_title(work.place, prepend="")


[docs]def work_link(work):
    """Create hyperlink based on work attributes. 
    The attribute _link is set by the graph creator and it specifies the priority order of link types
    
    Default: uses attributes file, link, or scholar to create link
    """
    for link_type in work._link:
        if link_type == "file" and work.file:
            return "files/" + work.file
        if link_type == "link" and hasattr(work, "link") and work.link:
            return work.link
        if link_type == "scholar" and hasattr(work, "scholar"):
            return work.scholar
    return None


[docs]def work_tooltip(work):
    """Generate work tooltip

    Default: tooltip with paper title and authors
    """
    from .operations import work_to_bibtex
    return (
        "{}\n{}".format(work.name, work.authors)
        + "\n\n" + work_to_bibtex(work)
    )


[docs]def citation_tooltip(citation):
    """Generate citation tooltip"""
    return (
        "{0} -> {1}".format(citation.work.metakey, citation.citation.metakey)
        + generate_title(citation, ignore={"_.*", "work", "citation"})
    )


### Approaches

APPROACH_FORCE_PREFIX = "force_"
APPROACH_RELATED_CATEGORY = "Related"

[docs]def approach_ids_from_work(approach, works):
    for work in approach.work:
        if work in works and ("snowball" in work.category or "ok" in work.category):
            yield works[work]["ID"]

[docs]def approach_display(approach):
    """Get approach display"""
    return work_display(approach).replace("  ", "")

### Database

# Module setting
MODULES = {
    "places": None,
    "work": None,
    "citations": None,
    "groups": None,
}

# Map of Work attributes used by the tool
#   I advise against changing this variable since I may have forgotten to replace an attribute in a function
ATTR = {
    # Work
    "category": "category",
    "metakey": "metakey", 
    "pyref": "pyref", 
    "year": "year", 
    "name": "name",
    "site_link": "link",
    "email_authors": "authors",
    "citation_file": "citation_file",


    # Approach
    "approach_dont_cite": "dont_cite",
    "approach_work": "work",
}


# Similar to ATTR variable, bu provide a direct map from existing keys from scholar
SCHOLAR_MAP = {
    "scholar_id": "scholar_id",
    "cluster_id": "cluster_id",
    "scholar": "scholar",
    "scholar_ok": "scholar_ok",
}

define_cvar(ATTR)