Module CMUDaaN.graphs
Expand source code
from utils import WordDict
from collections.abc import Iterable
import random
import nltk
import os
from itertools import combinations, chain
import networkx as nx
import matplotlib.pyplot as plt
# load wordnet
path = os.path.abspath("")
if path not in nltk.data.path:
nltk.data.path.append(path)
from nltk.corpus import wordnet as wn
# load the worddict
wd = WordDict()
class Random:
def __init__(self, n:int = 10, seq:str = None, startswith:bool = None):
self.n = int(n) if isinstance(n, int) else None
self.seq = seq
self.startswith = startswith
def __str__(self):
return "Random"
def _find_startswith(self, keys: list[str]) -> Iterable[str]:
"""Find all words that start with self.seq
Args:
keys (list[str]): Words to be filtered
Returns:
Iterable[str]: new set of words based on criteria
"""
return set(filter(lambda x: x.startswith(self.seq), keys))
def _find_endswith(self, keys: list[str]) -> Iterable[str]:
"""Final all words that end with self.seq
Args:
keys (list[str]): Words to be filtered
Returns:
Iterable[str]: new set of wrods based on critera
"""
return set(filter(lambda x: x.endswith(self.seq), keys))
def _find_combinations_driver(self, stream_keys: list):
# if n and seq are None
if not self.n and not self.seq:
final_keys = stream_keys
# if n is not None and seq is None
elif self.n and not self.seq:
final_keys = random.sample(stream_keys, self.n)
# if n is None and seq is not None
elif not self.n and self.seq:
if self.startswith:
final_keys = self._find_startswith(stream_keys)
else:
final_keys = self._find_endswith(stream_keys)
# if n and seq are not None
else:
if self.startswith:
final_keys = random.sample(self._find_startswith(stream_keys), self.n)
else:
final_keys = random.sample(self._find_endswith(stream_keys), self.n)
return final_keys
@property
def _find_combinations(self):
stream_keys = list(wd.stream.keys())
final_keys = self._find_combinations_driver(stream_keys)
return combinations(final_keys, 2)
class WordList:
def __init__(self, wordlist:list[str], n_per_community:int = 10, communities:bool = None):
self.wordlist = wordlist
self.n_per_community = int(n_per_community) if n_per_community else None
self.communities = communities
def __str__(self):
return "WordList"
def _ensure_word_in_sample(self, word: str, stream_keys: list) -> list:
return combinations([word] + random.sample(stream_keys, self.n), 2)
@property
def _find_combinations(self) -> Iterable[tuple[str, str]]:
"""Find combinations based on class criteria
Returns:
Iterable[tuple[str, str]]: generated combinations of words
"""
# if wordlist is not None and commuinities is None
if self.wordlist and not self.communities:
return combinations(self.wordlist, 2)
# if wordlist and communities is not None
if self.wordlist and self.communities:
stream_keys = list(wd.stream.keys())
wordlist_comb = list(combinations(self.wordlist, 2)) # n*(n-1)/2
new_comb = list(chain.from_iterable([self._ensure_word_in_sample(w, stream_keys) for w in self.wordlist])) #
return set(wordlist_comb + new_comb)
class Synonym:
def __init__(self, word: str, communities: bool = None, n_synonyms: int = None, n:int = 10):
self.communities = communities
self.word = word
self.n_synonyms = int(n_synonyms) if isinstance(n_synonyms, int) else None
self.n = int(n) if isinstance(n, int) else None
def __str__(self):
return "Synonym"
@property
def _synonyms(self):
syn = wn.synsets(self.word)
if self.n_synonyms is not None:
if self.n_synonyms < len(syn):
syn = random.sample(syn, self.n_synonyms)
syn = set(map(lambda x: str(x.name().split('.')[0]), syn))
return syn
def _ensure_word_in_sample(self, word: str, stream_keys: list) -> list:
return combinations([word] + random.sample(stream_keys, self.n), 2)
@property
def _find_combinations(self) -> Iterable[tuple[str, str]]:
"""Find combinations based on class criteria
Returns:
Iterable[tuple[str, str]]: generated combinations of words
"""
# if wordlist is not None and commuinities is None
if not self.communities:
return combinations(self._synonyms, 2)
syns = self._synonyms
stream_keys = list(wd.stream.keys())
wordlist_comb = list(combinations(syns, 2)) # n*(n-1)/2
new_comb = list(chain.from_iterable([self._ensure_word_in_sample(w, stream_keys) for w in syns])) #
return set(wordlist_comb + new_comb)
Classes
class Random (n: int = 10, seq: str = None, startswith: bool = None)
-
Expand source code
class Random: def __init__(self, n:int = 10, seq:str = None, startswith:bool = None): self.n = int(n) if isinstance(n, int) else None self.seq = seq self.startswith = startswith def __str__(self): return "Random" def _find_startswith(self, keys: list[str]) -> Iterable[str]: """Find all words that start with self.seq Args: keys (list[str]): Words to be filtered Returns: Iterable[str]: new set of words based on criteria """ return set(filter(lambda x: x.startswith(self.seq), keys)) def _find_endswith(self, keys: list[str]) -> Iterable[str]: """Final all words that end with self.seq Args: keys (list[str]): Words to be filtered Returns: Iterable[str]: new set of wrods based on critera """ return set(filter(lambda x: x.endswith(self.seq), keys)) def _find_combinations_driver(self, stream_keys: list): # if n and seq are None if not self.n and not self.seq: final_keys = stream_keys # if n is not None and seq is None elif self.n and not self.seq: final_keys = random.sample(stream_keys, self.n) # if n is None and seq is not None elif not self.n and self.seq: if self.startswith: final_keys = self._find_startswith(stream_keys) else: final_keys = self._find_endswith(stream_keys) # if n and seq are not None else: if self.startswith: final_keys = random.sample(self._find_startswith(stream_keys), self.n) else: final_keys = random.sample(self._find_endswith(stream_keys), self.n) return final_keys @property def _find_combinations(self): stream_keys = list(wd.stream.keys()) final_keys = self._find_combinations_driver(stream_keys) return combinations(final_keys, 2)
class Synonym (word: str, communities: bool = None, n_synonyms: int = None, n: int = 10)
-
Expand source code
class Synonym: def __init__(self, word: str, communities: bool = None, n_synonyms: int = None, n:int = 10): self.communities = communities self.word = word self.n_synonyms = int(n_synonyms) if isinstance(n_synonyms, int) else None self.n = int(n) if isinstance(n, int) else None def __str__(self): return "Synonym" @property def _synonyms(self): syn = wn.synsets(self.word) if self.n_synonyms is not None: if self.n_synonyms < len(syn): syn = random.sample(syn, self.n_synonyms) syn = set(map(lambda x: str(x.name().split('.')[0]), syn)) return syn def _ensure_word_in_sample(self, word: str, stream_keys: list) -> list: return combinations([word] + random.sample(stream_keys, self.n), 2) @property def _find_combinations(self) -> Iterable[tuple[str, str]]: """Find combinations based on class criteria Returns: Iterable[tuple[str, str]]: generated combinations of words """ # if wordlist is not None and commuinities is None if not self.communities: return combinations(self._synonyms, 2) syns = self._synonyms stream_keys = list(wd.stream.keys()) wordlist_comb = list(combinations(syns, 2)) # n*(n-1)/2 new_comb = list(chain.from_iterable([self._ensure_word_in_sample(w, stream_keys) for w in syns])) # return set(wordlist_comb + new_comb)
class WordList (wordlist: list[str], n_per_community: int = 10, communities: bool = None)
-
Expand source code
class WordList: def __init__(self, wordlist:list[str], n_per_community:int = 10, communities:bool = None): self.wordlist = wordlist self.n_per_community = int(n_per_community) if n_per_community else None self.communities = communities def __str__(self): return "WordList" def _ensure_word_in_sample(self, word: str, stream_keys: list) -> list: return combinations([word] + random.sample(stream_keys, self.n), 2) @property def _find_combinations(self) -> Iterable[tuple[str, str]]: """Find combinations based on class criteria Returns: Iterable[tuple[str, str]]: generated combinations of words """ # if wordlist is not None and commuinities is None if self.wordlist and not self.communities: return combinations(self.wordlist, 2) # if wordlist and communities is not None if self.wordlist and self.communities: stream_keys = list(wd.stream.keys()) wordlist_comb = list(combinations(self.wordlist, 2)) # n*(n-1)/2 new_comb = list(chain.from_iterable([self._ensure_word_in_sample(w, stream_keys) for w in self.wordlist])) # return set(wordlist_comb + new_comb)