importjsonimportwarningsfrompathlibimportPath# load in baseline keywords
[docs]defload_baseline_keywords():"""Load baseline keywords from the JSON file. This function reads a JSON file containing baseline keywords organized by categories. It flattens the categories into a single list of keywords, converting them to lowercase for uniformity. This list can be used for evaluating resumes against a standard set of keywords relevant to data science. Returns ------- list of str A list of baseline keywords in lowercase, extracted from the JSON file. Raises ------ FileNotFoundError If the JSON file containing the baseline keywords cannot be found. json.JSONDecodeError If the JSON file is not properly formatted. """data_path=Path(__file__).parent/"data"/"baseline_keywords.json"withopen(data_path,"r")asf:keywords_dict=json.load(f)# flatten all categories into a single listreturn[keyword.lower()forcategoryinkeywords_dict.values()forkeywordincategory]
[docs]defevaluate_keywords(cleaned_text,keywords=None,use_only_supplied_keywords=False):""" Evaluate the quality of a resume by comparing its content against a set of predefined or user-supplied keywords. This function assesses whether the resume contains relevant keywords that match the criteria for a "good data science resume." Users can provide their own keywords or combine them with a default set of predefined keywords. Parameters ---------- cleaned_text : str The cleaned text content of the resume. keywords : list of str, optional A list of keywords to compare against the resume content. If not provided, only the baseline keywords will be used. If `use_only_supplied_keywords` is set to True without supplying keywords, no keywords will be used, and the function will return an empty result. use_only_supplied_keywords : bool, optional A flag to determine whether to use only the supplied keywords or to combine them with a default set of predefined keywords. Defaults to False. Returns ------- list of str A list of keywords (from either the baseline or provided keywords) that do not appear in the `cleaned_text`. Examples -------- >>> evaluate_keywords("software development project management agile methodologies", ["software", "agile", "teamwork"]) ['teamwork'] >>> evaluate_keywords("data analysis machine learning statistical modeling", use_only_supplied_keywords=False) ['teamwork', 'communication'] """# input validation: verify text and keywords are stringsifnotisinstance(cleaned_text,str):raiseTypeError("cleaned_text must be a string")ifkeywordsisnotNoneandnotall(isinstance(k,str)forkinkeywords):raiseTypeError("All keywords must be strings")# Check for empty text and warn userifnotcleaned_text.strip():warnings.warn("The provided resume text is an empty string. Returning all baseline keywords as missing.",UserWarning)# Warn if user wants to use only supplied keywords but provides noneifuse_only_supplied_keywordsand(keywordsisNoneorlen(keywords)==0):warnings.warn("No keywords provided while use_only_supplied_keywords=True. Returning empty list.",UserWarning)# convert text to lowercase for case-insensitive matchingcleaned_text=cleaned_text.lower()# initialize the set of keywords to check# this will avoid duplicates as wellkeywords_to_check=set()# handle the supplied keywordsifkeywordsisnotNone:keywords_to_check.update(k.lower()forkinkeywords)# add baseline keywords if neededifnotuse_only_supplied_keywords:keywords_to_check.update(load_baseline_keywords())# if no keywords to check (edge case: use_only_supplied_keywords=True but no keywords provided)ifnotkeywords_to_check:return[]# lastly find missing keywordsmissing_keywords=[]forkeywordinkeywords_to_check:ifkeywordnotincleaned_text:missing_keywords.append(keyword)returnmissing_keywords