updated the way we handle reasoning again with new facts. Made it modular with same format of pyreason facts. Updated docs

dyumanaditya · dyumanaditya · commit 383f3611f541 · 2025-02-20T11:29:29.000+01:00
diff --git a/docs/source/user_guide/8_advanced_usage.rst b/docs/source/user_guide/8_advanced_usage.rst
@@ -18,4 +18,10 @@ Reasoning Multiple Times
 -------------------------
 PyReason allows you to reason over the graph multiple times. This can be useful when you want to reason over the graph iteratively
 and add facts that were not available before. To reason over the graph multiple times, you can set ``again=True`` in ``pr.reason(again=True)``.
-To specify additional facts, use the ``node_facts`` or ``edge_facts`` parameters in ``pr.reason(...)``. These parameters allow you to add additional facts to the graph before reasoning again.
+To specify additional facts, use the ``facts`` parameter in ``pr.reason(...)``. These parameters allow you to add additional
+facts to the graph before reasoning again. The facts are specified as a list of PyReason facts.
+
+.. note::
+    When reasoning multiple times, the time continues to increment. Therefore any facts that are added should take this into account.
+    The timestep parameter specifies how many additional timesteps to reason. For example, if the initial reasoning converges at
+    timestep 5, and you want to reason for 3 more timesteps, you can set ``timestep=3`` in ``pr.reason(timestep=3, again=True, facts=[some_new_fact])``.
diff --git a/pyreason/.cache_status.yaml b/pyreason/.cache_status.yaml
@@ -1 +1 @@
-initialized: true
+initialized: false
diff --git a/pyreason/pyreason.py b/pyreason/pyreason.py
@@ -6,7 +6,7 @@
 import pandas as pd
 import memory_profiler as mp
 import warnings
-from typing import List, Type, Callable, Tuple
+from typing import List, Type, Callable, Tuple, Optional
 
 from pyreason.scripts.utils.output import Output
 from pyreason.scripts.utils.filter import Filter
@@ -423,24 +423,24 @@ def allow_ground_rules(self, value: bool) -> None:
 
 
 # VARIABLES
-__graph = None
-__rules = None
-__clause_maps = None
-__node_facts = None
-__edge_facts = None
-__ipl = None
-__specific_node_labels = None
-__specific_edge_labels = None
-
-__non_fluent_graph_facts_node = None
-__non_fluent_graph_facts_edge = None
-__specific_graph_node_labels = None
-__specific_graph_edge_labels = None
+__graph: Optional[nx.DiGraph] = None
+__rules: Optional[numba.typed.List] = None
+__clause_maps: Optional[dict] = None
+__node_facts: Optional[numba.typed.List] = None
+__edge_facts: Optional[numba.typed.List] = None
+__ipl: Optional[numba.typed.List] = None
+__specific_node_labels: Optional[numba.typed.List] = None
+__specific_edge_labels: Optional[numba.typed.List] = None
+
+__non_fluent_graph_facts_node: Optional[numba.typed.List] = None
+__non_fluent_graph_facts_edge: Optional[numba.typed.List] = None
+__specific_graph_node_labels: Optional[numba.typed.List] = None
+__specific_graph_edge_labels: Optional[numba.typed.List] = None
 
 __annotation_functions = []
 
 __timestamp = ''
-__program = None
+__program: Optional[Program] = None
 
 __graphml_parser = GraphmlParser()
 settings = _Settings()
@@ -624,16 +624,15 @@ def add_annotation_function(function: Callable) -> None:
     __annotation_functions.append(function)
 
 
-def reason(timesteps: int = -1, convergence_threshold: int = -1, convergence_bound_threshold: float = -1, queries: List[Query] = None, again: bool = False, node_facts: List[Type[fact_node.Fact]] = None, edge_facts: List[Type[fact_edge.Fact]] = None):
+def reason(timesteps: int = -1, convergence_threshold: int = -1, convergence_bound_threshold: float = -1, queries: List[Query] = None, again: bool = False, facts: List[Fact] = None):
     """Function to start the main reasoning process. Graph and rules must already be loaded.
 
     :param timesteps: Max number of timesteps to run. -1 specifies run till convergence. If reasoning again, this is the number of timesteps to reason for extra (no zero timestep), defaults to -1
     :param convergence_threshold: Maximum number of interpretations that have changed between timesteps or fixed point operations until considered convergent. Program will end at convergence. -1 => no changes, perfect convergence, defaults to -1
     :param convergence_bound_threshold: Maximum change in any interpretation (bounds) between timesteps or fixed point operations until considered convergent, defaults to -1
     :param queries: A list of PyReason query objects that can be used to filter the ruleset based on the query. Default is None
     :param again: Whether to reason again on an existing interpretation, defaults to False
-    :param node_facts: New node facts to use during the next reasoning process. Other facts from file will be discarded, defaults to None
-    :param edge_facts: New edge facts to use during the next reasoning process. Other facts from file will be discarded, defaults to None
+    :param facts: New facts to use during the next reasoning process when reasoning again. Other facts from file will be discarded, defaults to None
     :return: The final interpretation after reasoning.
     """
     global settings, __timestamp
@@ -654,10 +653,10 @@ def reason(timesteps: int = -1, convergence_threshold: int = -1, convergence_bou
     else:
         if settings.memory_profile:
             start_mem = mp.memory_usage(max_usage=True)
-            mem_usage, interp = mp.memory_usage((_reason_again, [timesteps, convergence_threshold, convergence_bound_threshold, node_facts, edge_facts]), max_usage=True, retval=True)
+            mem_usage, interp = mp.memory_usage((_reason_again, [timesteps, convergence_threshold, convergence_bound_threshold, facts]), max_usage=True, retval=True)
             print(f"\nProgram used {mem_usage-start_mem} MB of memory")
         else:
-            interp = _reason_again(timesteps, convergence_threshold, convergence_bound_threshold, node_facts, edge_facts)
+            interp = _reason_again(timesteps, convergence_threshold, convergence_bound_threshold, facts)
         
     return interp
 
@@ -746,20 +745,31 @@ def _reason(timesteps, convergence_threshold, convergence_bound_threshold, queri
     return interpretation
 
 
-def _reason_again(timesteps, convergence_threshold, convergence_bound_threshold, node_facts, edge_facts):
+def _reason_again(timesteps, convergence_threshold, convergence_bound_threshold, facts):
     # Globals
     global __graph, __rules, __node_facts, __edge_facts, __ipl, __specific_node_labels, __specific_edge_labels, __graphml_parser
     global settings, __timestamp, __program
 
     assert __program is not None, 'To run `reason_again` you need to have reasoned once before'
 
-    # Extend current set of facts with the new facts supplied
-    all_edge_facts = numba.typed.List.empty_list(fact_edge.fact_type)
+    # Parse new facts and Extend current set of facts with the new facts supplied
     all_node_facts = numba.typed.List.empty_list(fact_node.fact_type)
-    if node_facts is not None:
-        all_node_facts.extend(numba.typed.List(node_facts))
-    if edge_facts is not None:
-        all_edge_facts.extend(numba.typed.List(edge_facts))
+    all_edge_facts = numba.typed.List.empty_list(fact_edge.fact_type)
+    fact_cnt = 1
+    for fact in facts:
+        if fact.type == 'node':
+            print(fact.name)
+            if fact.name is None:
+                fact.name = f'fact_{len(__node_facts)+len(__edge_facts)+fact_cnt}'
+            f = fact_node.Fact(fact.name, fact.component, fact.pred, fact.bound, fact.start_time, fact.end_time, fact.static)
+            all_node_facts.append(f)
+            fact_cnt += 1
+        else:
+            if fact.name is None:
+                fact.name = f'fact_{len(__node_facts)+len(__edge_facts)+fact_cnt}'
+            f = fact_edge.Fact(fact.name, fact.component, fact.pred, fact.bound, fact.start_time, fact.end_time, fact.static)
+            all_edge_facts.append(f)
+            fact_cnt += 1
 
     # Run Program and get final interpretation
     interpretation = __program.reason_again(timesteps, convergence_threshold, convergence_bound_threshold, all_node_facts, all_edge_facts, settings.verbose)
diff --git a/pyreason/scripts/interpretation/interpretation.py b/pyreason/scripts/interpretation/interpretation.py
@@ -209,6 +209,8 @@ def _init_facts(facts_node, facts_edge, facts_to_be_applied_node, facts_to_be_ap
 		return max_time
 
 	def _start_fp(self, rules, max_facts_time, verbose, again):
+		if again:
+			self.num_ga.append(self.num_ga[-1])
 		fp_cnt, t = self.reason(self.interpretations_node, self.interpretations_edge, self.predicate_map_node, self.predicate_map_edge, self.tmax, self.prev_reasoning_data, rules, self.nodes, self.edges, self.neighbors, self.reverse_neighbors, self.rules_to_be_applied_node, self.rules_to_be_applied_edge, self.edges_to_be_added_node_rule, self.edges_to_be_added_edge_rule, self.rules_to_be_applied_node_trace, self.rules_to_be_applied_edge_trace, self.facts_to_be_applied_node, self.facts_to_be_applied_edge, self.facts_to_be_applied_node_trace, self.facts_to_be_applied_edge_trace, self.ipl, self.rule_trace_node, self.rule_trace_edge, self.rule_trace_node_atoms, self.rule_trace_edge_atoms, self.reverse_graph, self.atom_trace, self.save_graph_attributes_to_rule_trace, self.persistent, self.inconsistency_check, self.store_interpretation_changes, self.update_mode, self.allow_ground_rules, max_facts_time, self.annotation_functions, self._convergence_mode, self._convergence_delta, self.num_ga, verbose, again)
 		self.time = t - 1
 		# If we need to reason again, store the next timestep to start from
diff --git a/pyreason/scripts/interpretation/interpretation_parallel.py b/pyreason/scripts/interpretation/interpretation_parallel.py
@@ -209,6 +209,8 @@ def _init_facts(facts_node, facts_edge, facts_to_be_applied_node, facts_to_be_ap
 		return max_time
 
 	def _start_fp(self, rules, max_facts_time, verbose, again):
+		if again:
+			self.num_ga.append(self.num_ga[-1])
 		fp_cnt, t = self.reason(self.interpretations_node, self.interpretations_edge, self.predicate_map_node, self.predicate_map_edge, self.tmax, self.prev_reasoning_data, rules, self.nodes, self.edges, self.neighbors, self.reverse_neighbors, self.rules_to_be_applied_node, self.rules_to_be_applied_edge, self.edges_to_be_added_node_rule, self.edges_to_be_added_edge_rule, self.rules_to_be_applied_node_trace, self.rules_to_be_applied_edge_trace, self.facts_to_be_applied_node, self.facts_to_be_applied_edge, self.facts_to_be_applied_node_trace, self.facts_to_be_applied_edge_trace, self.ipl, self.rule_trace_node, self.rule_trace_edge, self.rule_trace_node_atoms, self.rule_trace_edge_atoms, self.reverse_graph, self.atom_trace, self.save_graph_attributes_to_rule_trace, self.persistent, self.inconsistency_check, self.store_interpretation_changes, self.update_mode, self.allow_ground_rules, max_facts_time, self.annotation_functions, self._convergence_mode, self._convergence_delta, self.num_ga, verbose, again)
 		self.time = t - 1
 		# If we need to reason again, store the next timestep to start from
@@ -218,7 +220,7 @@ def _start_fp(self, rules, max_facts_time, verbose, again):
 			print('Fixed Point iterations:', fp_cnt)
 
 	@staticmethod
-	@numba.njit(cache=True, parallel=False)
+	@numba.njit(cache=True, parallel=True)
 	def reason(interpretations_node, interpretations_edge, predicate_map_node, predicate_map_edge, tmax, prev_reasoning_data, rules, nodes, edges, neighbors, reverse_neighbors, rules_to_be_applied_node, rules_to_be_applied_edge, edges_to_be_added_node_rule, edges_to_be_added_edge_rule, rules_to_be_applied_node_trace, rules_to_be_applied_edge_trace, facts_to_be_applied_node, facts_to_be_applied_edge, facts_to_be_applied_node_trace, facts_to_be_applied_edge_trace, ipl, rule_trace_node, rule_trace_edge, rule_trace_node_atoms, rule_trace_edge_atoms, reverse_graph, atom_trace, save_graph_attributes_to_rule_trace, persistent, inconsistency_check, store_interpretation_changes, update_mode, allow_ground_rules, max_facts_time, annotation_functions, convergence_mode, convergence_delta, num_ga, verbose, again):
 		t = prev_reasoning_data[0]
 		fp_cnt = prev_reasoning_data[1]
diff --git a/tests/test_hello_world.py b/tests/test_hello_world.py
@@ -48,4 +48,3 @@ def test_hello_world():
     # John should be popular in timestep 3
     assert 'John' in dataframes[2]['component'].values and dataframes[2].iloc[1].popular == [1, 1], 'John should have popular bounds [1,1] for t=2 timesteps'
 
-test_hello_world()
diff --git a/tests/test_reason_again.py b/tests/test_reason_again.py
@@ -0,0 +1,55 @@
+# Test if the simple hello world program works
+import pyreason as pr
+import faulthandler
+
+
+def test_reason_again():
+    # Reset PyReason
+    pr.reset()
+    pr.reset_rules()
+    pr.reset_settings()
+
+    # Modify the paths based on where you've stored the files we made above
+    graph_path = './tests/friends_graph.graphml'
+
+    # Modify pyreason settings to make verbose
+    pr.settings.verbose = True     # Print info to screen
+    pr.settings.atom_trace = True  # Save atom trace
+    # pr.settings.optimize_rules = False  # Disable rule optimization for debugging
+
+    # Load all the files into pyreason
+    pr.load_graphml(graph_path)
+    pr.add_rule(pr.Rule('popular(x) <-1 popular(y), Friends(x,y), owns(y,z), owns(x,z)', 'popular_rule'))
+    pr.add_fact(pr.Fact('popular(Mary)', 'popular_fact', 0, 1))
+
+    # Run the program for two timesteps to see the diffusion take place
+    faulthandler.enable()
+    interpretation = pr.reason(timesteps=1)
+
+    # Now reason again
+    new_fact = pr.Fact('popular(Mary)', 'popular_fact2', 2, 4)
+    interpretation = pr.reason(timesteps=3, again=True, facts=[new_fact])
+    pr.save_rule_trace(interpretation)
+
+    # Display the changes in the interpretation for each timestep
+    dataframes = pr.filter_and_sort_nodes(interpretation, ['popular'])
+    for t, df in enumerate(dataframes):
+        print(f'TIMESTEP - {t}')
+        print(df)
+        print()
+
+    assert len(dataframes[2]) == 1, 'At t=0 there should be one popular person'
+    assert len(dataframes[3]) == 2, 'At t=1 there should be two popular people'
+    assert len(dataframes[4]) == 3, 'At t=2 there should be three popular people'
+
+    # Mary should be popular in all three timesteps
+    assert 'Mary' in dataframes[2]['component'].values and dataframes[2].iloc[0].popular == [1, 1], 'Mary should have popular bounds [1,1] for t=0 timesteps'
+    assert 'Mary' in dataframes[3]['component'].values and dataframes[3].iloc[0].popular == [1, 1], 'Mary should have popular bounds [1,1] for t=1 timesteps'
+    assert 'Mary' in dataframes[4]['component'].values and dataframes[4].iloc[0].popular == [1, 1], 'Mary should have popular bounds [1,1] for t=2 timesteps'
+
+    # Justin should be popular in timesteps 1, 2
+    assert 'Justin' in dataframes[3]['component'].values and dataframes[3].iloc[1].popular == [1, 1], 'Justin should have popular bounds [1,1] for t=1 timesteps'
+    assert 'Justin' in dataframes[4]['component'].values and dataframes[4].iloc[2].popular == [1, 1], 'Justin should have popular bounds [1,1] for t=2 timesteps'
+
+    # John should be popular in timestep 3
+    assert 'John' in dataframes[4]['component'].values and dataframes[4].iloc[1].popular == [1, 1], 'John should have popular bounds [1,1] for t=2 timesteps'

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-initialized: true`
	`1`	`+initialized: false`
Original file line number	Diff line number	Diff line change
`@@ -48,4 +48,3 @@ def test_hello_world():`
`48`	`48`	`# John should be popular in timestep 3`
`49`	`49`	`assert 'John' in dataframes[2]['component'].values and dataframes[2].iloc[1].popular == [1, 1], 'John should have popular bounds [1,1] for t=2 timesteps'`
`50`	`50`
`51`		`-test_hello_world()`