# -*- coding: utf-8 -*-
# Contributors:
# Paco Mora Caselles <pacomoracaselles@gmail.com>
"""Tests of the functionality contained in the file 'algorithms/bsd.py'.
"""
from os import remove
from bitarray import bitarray
from pandas import DataFrame
from subgroups.algorithms.subgroup_sets.bsd import BSD
from subgroups.core.operator import Operator
from subgroups.core.pattern import Pattern
from subgroups.core.selector import Selector
from subgroups.core.subgroup import Subgroup
from subgroups.quality_measures.wracc import WRAcc
from subgroups.quality_measures.wracc_optimistic_estimate_1 import WRAccOptimisticEstimate1
import unittest
[docs]
class TestBSD(unittest.TestCase):
[docs]
def test_BSD_init_method(self) -> None:
self.assertRaises(TypeError, BSD, 0, "hello")
self.assertRaises(TypeError, BSD, 0, WRAcc(),"hello")
self.assertRaises(TypeError, BSD, 0, WRAcc(),WRAccOptimisticEstimate1(),"hello")
self.assertRaises(TypeError, BSD, 0, WRAcc(),WRAccOptimisticEstimate1(),1,"hello")
BSD(0, WRAcc(),WRAccOptimisticEstimate1(),5,10,write_results_in_file=False)
BSD(0, WRAcc(),WRAccOptimisticEstimate1(),5,10,write_results_in_file=False,file_path="./asdasfha/fsdvm") # Path does not exist, but it is not checked
self.assertRaises(TypeError, BSD, 0 , WRAcc(), WRAccOptimisticEstimate1(), 5,10, write_results_in_file=True, file_path=24) # If 'file_path' is present, it must be of type str, no matter the flag.
self.assertRaises(TypeError, BSD, 0 , WRAcc(), WRAccOptimisticEstimate1(), 5,10, write_results_in_file=False, file_path=24) # If 'file_path' is present, it must be of type str, no matter the flag.
self.assertRaises(ValueError, BSD, 0 , WRAcc(), WRAccOptimisticEstimate1(), 5,10, write_results_in_file=True, file_path=None) # If 'write_results_in_file' is True, 'file_path' must not be None.
[docs]
def test_BSD_checkRel(self) -> None:
bsd = BSD(0, WRAcc(),WRAccOptimisticEstimate1(),5,10,write_results_in_file=False)
res = [(0,Pattern([Selector("att1",Operator.EQUAL,"A")]),bitarray("110000"))]
self.assertFalse(bsd._checkRel(res,bitarray("100"),bitarray("000"),0.,Pattern([])))
self.assertFalse(bsd._checkRel(res,bitarray("100"),bitarray("010"),0.,Pattern([])))
res = [(0,Pattern([Selector("att1",Operator.EQUAL,"A")]),bitarray("110010"))]
self.assertTrue(bsd._checkRel(res,bitarray("100"),bitarray("000"),0.,Pattern([])))
self.assertFalse(bsd._checkRel(res,bitarray("100"),bitarray("010"),0.,Pattern([])))
[docs]
def test_BSD_checkRelevancies(self) -> None:
bsd = BSD(0, WRAcc(),WRAccOptimisticEstimate1(),5,10,write_results_in_file=False)
res = [(0,Pattern([Selector("att1",Operator.EQUAL,"A")]),bitarray("110000")),(0,Pattern([Selector("att1",Operator.EQUAL,"B")]),bitarray("100000"))]
bsd._k_subgroups = res
bsd._checkRelevancies(bitarray("100"),bitarray("000"),Pattern([Selector("att1",Operator.EQUAL,"B")]))
self.assertEqual(bsd._k_subgroups,res) # The subgroup att1=B is irrelevant, but is not checked
bsd._checkRelevancies(bitarray("110"),bitarray("000"),Pattern([Selector("att1",Operator.EQUAL,"A")]))
self.assertNotEqual(bsd._k_subgroups,res) # The subgroup att1=B is irrelevant, and is checked
[docs]
def test_BSD_cardinality(self) -> None:
bsd = BSD(0, WRAcc(),WRAccOptimisticEstimate1(),5,10,write_results_in_file=False)
ba = bitarray("110000")
self.assertEqual(bsd._cardinality(ba),2)
ba = bitarray("110010")
self.assertEqual(bsd._cardinality(ba),3)
ba = bitarray("000000")
self.assertEqual(bsd._cardinality(ba),0)
[docs]
def test_BSD_logicalAnd(self) -> None:
bsd = BSD(0, WRAcc(),WRAccOptimisticEstimate1(),5,10,write_results_in_file=False)
ba1 = bitarray("110000")
ba2 = bitarray("100000")
self.assertEqual(bsd._logicalAnd(ba1,ba2),bitarray("100000"))
ba1 = bitarray("110010")
ba2 = bitarray("000001")
self.assertEqual(bsd._logicalAnd(ba1,ba2),bitarray("000000"))
[docs]
def test_BSD_fit1(self) -> None:
df = DataFrame({"a1" : ["a","b","c","c"], "a2" : ["q","q","s","q"], "a3" : ["f","g","h","k"], "class" : ["n","y","n","y"]})
target = ("class", "y")
# IMPORTANT: WRAcc quality measure is defined between -1 and 1.
bsd = BSD(0, WRAcc(),WRAccOptimisticEstimate1(),5,10,write_results_in_file=False)
bsd.fit(df, target)
self.assertEqual(bsd.selected_subgroups, 3)
self.assertEqual(bsd.unselected_subgroups, 50)
self.assertEqual(bsd.visited_subgroups, 21)
[docs]
def test_BSD_fit2(self) -> None:
df = DataFrame({"a1" : ["a","b","c","c"], "a2" : ["q","q","s","q"], "a3" : ["f","g","h","k"], "class" : ["n","y","n","y"]})
target = ("class", "y")
# IMPORTANT: WRAcc quality measure is defined between -1 and 1.
bsd = BSD(0, WRAcc(),WRAccOptimisticEstimate1(),2,10,write_results_in_file=False)
bsd.fit(df, target)
self.assertEqual(bsd.selected_subgroups, 2)
self.assertEqual(bsd.unselected_subgroups, 23)
self.assertEqual(bsd.visited_subgroups, 17)
[docs]
def test_BSD_fit3(self) -> None:
df = DataFrame({"a1" : ["a","b","c","c"], "a2" : ["q","q","s","q"], "a3" : ["f","g","h","k"], "class" : ["n","y","n","y"]})
target = ("class", "y")
# IMPORTANT: WRAcc quality measure is defined between -1 and 1.
bsd = BSD(200, WRAcc(),WRAccOptimisticEstimate1(),5,10,write_results_in_file=False)
bsd.fit(df, target)
self.assertEqual(bsd.selected_subgroups, 0)
self.assertEqual(bsd.unselected_subgroups, 0)
self.assertEqual(bsd.visited_subgroups, 0)
[docs]
def test_BSD_fit4(self) -> None:
df = DataFrame({'bread': {0: 'yes', 1: 'yes', 2: 'no', 3: 'yes', 4: 'yes', 5: 'yes', 6: 'yes'}, 'milk': {0: 'yes', 1: 'no', 2: 'yes', 3: 'yes', 4: 'yes', 5: 'yes', 6: 'yes'}, 'beer': {0: 'no', 1: 'yes', 2: 'yes', 3: 'yes', 4: 'no', 5: 'yes', 6: 'no'}, 'coke': {0: 'no', 1: 'no', 2: 'yes', 3: 'no', 4: 'yes', 5: 'no', 6: 'yes'}, 'diaper': {0: 'no', 1: 'yes', 2: 'yes', 3: 'yes', 4: 'yes', 5: 'yes', 6: 'yes'}})
target = ("diaper", "yes")
bsd = BSD(min_support=0,quality_measure=WRAcc(),optimistic_estimate = WRAccOptimisticEstimate1(), num_subgroups=8,max_depth=100 , write_results_in_file = True, file_path = "./results.txt" )
bsd.fit(df, target)
self.assertEqual(bsd.selected_subgroups, 4)
file_to_read = open("./results.txt", "r")
list_of_written_results = []
for line in file_to_read:
list_of_written_results.append(line)
list_of_subgroups = [Subgroup.generate_from_str(elem.split(";")[0][:-1]) for elem in list_of_written_results]
self.assertIn(Subgroup.generate_from_str("Description: [bread = 'yes'], Target: diaper = 'yes'"), list_of_subgroups)
self.assertIn(Subgroup.generate_from_str("Description: [milk = 'yes'], Target: diaper = 'yes'"), list_of_subgroups)
self.assertIn(Subgroup.generate_from_str("Description: [coke = 'yes'], Target: diaper = 'yes'"), list_of_subgroups)
self.assertIn(Subgroup.generate_from_str("Description: [beer = 'yes'], Target: diaper = 'yes'"), list_of_subgroups)
file_to_read.close()
remove("./results.txt")