This commit is contained in:
		@@ -35,3 +35,12 @@ class Config:
 | 
			
		||||
 | 
			
		||||
    REDIS_URL = os.environ["REDIS_URL"]
 | 
			
		||||
    COLLECTOR_URL = os.environ["COLLECTOR_URL"]  # http://example.com/report/
 | 
			
		||||
 | 
			
		||||
    # how many times an incraising queue must be observed for an action to be taken
 | 
			
		||||
    RESCHEDULE_TIRGGER_LEVEL = int(os.environ.get("RESCHEDULE_TIRGGER_LEVEL", 5))
 | 
			
		||||
 | 
			
		||||
    # The counter clears itself after some time
 | 
			
		||||
    RESCHEDULE_TRIGGER_COUNTER_TTL = int(os.environ.get("RESCHEDULE_TRIGGER_COUNTER_TTL", 60))
 | 
			
		||||
 | 
			
		||||
    # how long a mark should live on a site which had troubled recently
 | 
			
		||||
    RECENT_TROUBLE_TTL = int(os.environ.get("RECENT_TROUBLE_TTL", 120))
 | 
			
		||||
 
 | 
			
		||||
@@ -8,10 +8,6 @@ from k8s_buzerator import ensure_running_pod_on_site
 | 
			
		||||
 | 
			
		||||
from urllib.parse import urljoin
 | 
			
		||||
 | 
			
		||||
RESCHEDULE_TIRGGER_LEVEL = 5  # how many times an incraising queue must be observed for an action to be taken
 | 
			
		||||
RESCHEDULE_TRIGGER_COUNTER_TTL = 60  # The counter clears itself after some time
 | 
			
		||||
RECENT_TROUBLE_TTL = 120  # how long a mark should live on a site which had trouble recently
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def run(redis_client: Redis, site_url_map: Dict[str, str]):
 | 
			
		||||
    run_count = redis_client.incr("RUNCOUNT")
 | 
			
		||||
@@ -31,17 +27,16 @@ def run(redis_client: Redis, site_url_map: Dict[str, str]):
 | 
			
		||||
            key = f"INCRAISINGQUEUE:{site_name}"
 | 
			
		||||
            incraising_queue_detected_times = redis_client.incr(key)
 | 
			
		||||
 | 
			
		||||
            if incraising_queue_detected_times > RESCHEDULE_TIRGGER_LEVEL:
 | 
			
		||||
            if incraising_queue_detected_times > Config.RESCHEDULE_TIRGGER_LEVEL:
 | 
			
		||||
                logging.debug(f"Tirgger level reached at {site_name}")
 | 
			
		||||
                redis_client.delete(key)
 | 
			
		||||
                incraising_queue_at.append(site_name)
 | 
			
		||||
            else:
 | 
			
		||||
                logging.debug(
 | 
			
		||||
                    f"Suspicious queue size change at {site_name} ({incraising_queue_detected_times}/{RESCHEDULE_TIRGGER_LEVEL})")
 | 
			
		||||
                redis_client.expire(key, RESCHEDULE_TRIGGER_COUNTER_TTL)  # Probably extend lifetime
 | 
			
		||||
                logging.debug(f"Suspicious queue size change at {site_name} ({incraising_queue_detected_times}/{Config.RESCHEDULE_TIRGGER_LEVEL})")
 | 
			
		||||
                redis_client.expire(key, Config.RESCHEDULE_TRIGGER_COUNTER_TTL)  # Probably extend lifetime
 | 
			
		||||
 | 
			
		||||
    # decide on default for the first time
 | 
			
		||||
    if run_count > RESCHEDULE_TIRGGER_LEVEL * 2:
 | 
			
		||||
    if run_count > Config.RESCHEDULE_TIRGGER_LEVEL * 2:
 | 
			
		||||
        default_site = redis_client.get("DEFAULT:SCHEDULED")
 | 
			
		||||
        if not default_site:
 | 
			
		||||
            logging.debug("Default site is not set. Selecting one...")
 | 
			
		||||
@@ -89,7 +84,7 @@ def run(redis_client: Redis, site_url_map: Dict[str, str]):
 | 
			
		||||
 | 
			
		||||
    # If attention required, schedule a single workload to one tier lower
 | 
			
		||||
    for site_seeking_attention in incraising_queue_at:
 | 
			
		||||
        redis_client.set(f"RECENTTROUBLE:{site_seeking_attention}", b"\x01", ex=RECENT_TROUBLE_TTL)
 | 
			
		||||
        redis_client.set(f"RECENTTROUBLE:{site_seeking_attention}", b"\x01", ex=Config.RECENT_TROUBLE_TTL)
 | 
			
		||||
        if current_scheduling_table_counters[site_seeking_attention] == 0:
 | 
			
		||||
            logging.warning("Wtf? Site reporting trouble, but there are no workload scheduled to it... nothing to do")
 | 
			
		||||
            continue
 | 
			
		||||
 
 | 
			
		||||
@@ -36,6 +36,7 @@ data:
 | 
			
		||||
  DEBUG: "yes"
 | 
			
		||||
  REDIS_URL: "redis://birb-scheduler-redis:6379/0"
 | 
			
		||||
  COLLECTOR_URL: "http://birb-latency-collector/report/"
 | 
			
		||||
  RECENT_TROUBLE_TTL: "240"
 | 
			
		||||
---
 | 
			
		||||
kind: ClusterRole
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user