From e5d806d63dd24cbe1af59a2346b97802e81263ba Mon Sep 17 00:00:00 2001 From: "Popal, Massi" <massi.popal@studium.uni-hamburg.de> Date: Sun, 23 Mar 2025 21:00:53 +0000 Subject: [PATCH] Upload New File --- gbac.py | 323 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 323 insertions(+) create mode 100644 gbac.py diff --git a/gbac.py b/gbac.py new file mode 100644 index 0000000..18d1919 --- /dev/null +++ b/gbac.py @@ -0,0 +1,323 @@ +#!/usr/bin/python3 + +import numpy as np +import networkx as nx +import pandas as pd +from typing import List, Tuple, Dict +import argparse +import logging +from collections import Counter, defaultdict +from ipaddress import ip_network, ip_address, ip_address as validate_ip +import random +import requests +import json +from flask import Flask, request, jsonify +import ssl +import os + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +def generate_dynamic_whitelist(alerts: np.ndarray, min_connections: int = 2) -> List[Tuple[str, str, str]]: + """Generate a whitelist of destination IP, port, and protocol tuples based on connection frequency.""" + dst_pairs = [(alert[3], str(alert[4]), alert[6]) for alert in alerts] + pair_counts = Counter(dst_pairs) + whitelist = [(dst_ip, dst_port, proto) for (dst_ip, dst_port, proto), count in pair_counts.items() if count >= min_connections] + logging.info(f"Generated dynamic whitelist with {len(whitelist)} entries: {whitelist[:10]}...") + return whitelist + +def is_whitelisted(alert: np.ndarray, whitelist: List[Tuple[str, str, str]]) -> bool: + """Check if an alert matches a whitelisted destination IP, port, and protocol tuple.""" + src_ip, src_port, dst_ip, dst_port, proto = alert[1], alert[2], alert[3], alert[4], alert[6] + return (dst_ip, str(dst_port), proto) in whitelist + +def validate_alert(alert): + """Validate alert data for correct IP addresses, port ranges, and timestamp format.""" + try: + validate_ip(alert[1]) + validate_ip(alert[3]) + src_port, dst_port = int(alert[2]), int(alert[4]) + if not (0 <= src_port <= 65535 and 0 <= dst_port <= 65535): + raise ValueError("Port number out of valid range") + float(alert[5]) + return True + except (ValueError, TypeError) as e: + logging.warning(f"Invalid alert skipped: {alert}, error: {e}") + return False + +def parse_log_file(log_file: str, log_type: str = 'conn', time_window: float = 60.0, network_id: str = None) -> np.ndarray: + """Parse a log file into an array of alerts, filtering based on suspicious behavior.""" + logging.info(f"Parsing {log_type} file: {log_file} for network {network_id}") + if log_file.endswith('.log'): + df = pd.read_csv(log_file, sep='\t', comment='#', header=None) + if log_type == 'conn': + names = ['ts', 'uid', 'id.orig_h', 'id.orig_p', 'id.resp_h', 'id.resp_p', 'proto', 'service', + 'duration', 'orig_bytes', 'resp_bytes', 'conn_state', 'local_orig', 'local_resp', + 'missed_bytes', 'history', 'orig_pkts', 'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes', + 'tunnel_parents', 'ip_proto'] + df.columns = names + required_fields = ['uid', 'id.orig_h', 'id.orig_p', 'id.resp_h', 'id.resp_p', 'ts', 'proto', 'conn_state'] + df.replace('-', np.nan, inplace=True) + df = df.infer_objects(copy=False) + + df = df.dropna(subset=['id.orig_h', 'ts']) + alerts = df[required_fields].to_numpy() + valid_alerts = [alert for alert in alerts if validate_alert(alert)] + if len(valid_alerts) < len(alerts): + logging.warning(f"Dropped {len(alerts) - len(valid_alerts)} invalid alerts") + + whitelist = generate_dynamic_whitelist(valid_alerts) + src_timestamps = defaultdict(list) + for alert in valid_alerts: + src_timestamps[alert[1]].append(float(alert[5])) + + src_counts = Counter([alert[1] for alert in valid_alerts]) + filtered_alerts = [] + total_alerts = len(valid_alerts) + + for i, alert in enumerate(valid_alerts): + if i % 50000 == 0: + logging.info(f"Filtering progress: {i}/{total_alerts} alerts processed") + src = alert[1] + count = src_counts[src] + relevant_ts = src_timestamps[src] + is_suspicious = ( + (log_type == 'conn' and str(alert[7]) in ['REJ', 'S0'] and count > 20 and + (max(relevant_ts) - min(relevant_ts) <= time_window if len(relevant_ts) > 1 else True)) or + (log_type == 'conn' and str(alert[6]) == 'icmp' and count > 50) or + (log_type == 'conn' and str(alert[7]) == 'SF' and count > 10 and + (max(relevant_ts) - min(relevant_ts) <= time_window if len(relevant_ts) > 1 else True)) + ) + if is_suspicious or not is_whitelisted(alert, whitelist): + filtered_alerts.append(np.append(alert, network_id)) # Append network_id to each alert + + logging.info(f"Parsed {total_alerts} alerts, {len(filtered_alerts)} remain after filtering") + return np.array(filtered_alerts) + +def alert_similarity_graph(alerts: np.ndarray, similarity_threshold: float = 0.25, time_window: float = 60.0) -> nx.Graph: + """Construct an undirected graph connecting alerts with sufficient similarity within a time window.""" + G = nx.Graph() + alerts_sorted = sorted(alerts, key=lambda x: float(x[5])) + for i, alert_1 in enumerate(alerts_sorted): + uid_1 = alert_1[0] + G.add_node(uid_1) + j = i + 1 + while j < len(alerts_sorted) and float(alerts_sorted[j][5]) - float(alert_1[5]) <= time_window: + alert_2 = alerts_sorted[j] + uid_2 = alert_2[0] + if uid_1 == uid_2: + j += 1 + continue + G.add_node(uid_2) + sim = sum(0.25 for attr_index in range(1, 5) if alert_1[attr_index] == alert_2[attr_index]) + if len(alert_1) > 7 and len(alert_2) > 7 and not pd.isna(alert_1[7]) and not pd.isna(alert_2[7]) and alert_1[7] == alert_2[7]: + sim += 0.25 + if sim >= similarity_threshold: + G.add_edge(uid_1, uid_2, weight=sim) + j += 1 + logging.info(f"Created similarity graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges") + return G + +def netflow_graph(alerts: np.ndarray) -> nx.DiGraph: + """Construct a directed graph representing network flow from source to destination IPs.""" + G = nx.DiGraph() + for alert in alerts: + src, dst = alert[1], alert[3] + if src != dst: + G.add_node(src) + G.add_node(dst) + G.add_edge(src, dst) + logging.info(f"Created netflow graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges") + return G + +def cluster_cliques(G: nx.Graph, k: int = 3) -> List[frozenset]: + """Identify k-clique communities in the similarity graph.""" + communities = list(nx.algorithms.community.k_clique_communities(G, k)) + logging.info(f"Found {len(communities)} k-clique communities with k={k}") + return communities + +def get_alerts_by_uid(alerts: np.ndarray, uid_list: List[str]) -> np.ndarray: + """Extract alerts corresponding to a list of UIDs.""" + uid_set = set(uid_list) + return np.array([alert for alert in alerts if alert[0] in uid_set]) + +def is_in_same_subnet(ip1: str, ip2: str, subnet: str = "147.32.84.0/24") -> bool: + """Determine if two IP addresses belong to the same subnet.""" + net = ip_network(subnet, strict=False) + return ip_address(ip1) in net and ip_address(ip2) in net + +def infer_label(directed_graph: nx.DiGraph, alerts: np.ndarray, time_window: float = 60.0, + exploit_threshold: int = 10, scan_threshold: int = 20, subnet: str = "147.32.84.0/24") -> List[Tuple[float, str, List[str], List[str]]]: + attackers = [node for node in directed_graph.nodes if directed_graph.out_degree(node) >= 1] + victims = [node for node in directed_graph.nodes if directed_graph.in_degree(node) >= 1] + + V, A, T = len(directed_graph.nodes), len(attackers), len(victims) + if len(alerts) < 10: + return [(0.0, "no attack (insufficient alerts)", attackers, victims)] + + oto = 1/3 * ((V-A)/(V-1) + (V-T)/(V-1) + (V-abs(A-T))/V) if V > 1 else 0 + otm = 1/3 * ((V-A)/(V-1) + T/(V-1) + abs(A-T)/(V-2)) if V > 2 else 0 + mto = 1/3 * (A/(V-1) + (V-T)/(V-1) + abs(A-T)/(V-2)) if V > 2 else 0 + mtm = 1/3 * (A/V + T/V + (V-abs(A-T))/V) if V > 0 else 0 + + src_dst_pairs = [(alert[1], alert[3]) for alert in alerts] + pair_counts = Counter(src_dst_pairs) + timestamps = [float(alert[5]) for alert in alerts] + proto = [alert[6] for alert in alerts] + conn_state = [alert[7] if len(alert) > 7 else None for alert in alerts] + src_targets = Counter([alert[1] for alert in alerts]) + + iocs = [] + + # Portscan-Erkennung + for src, count in src_targets.items(): + if count >= scan_threshold: + relevant_timestamps = [ts for alert, ts in zip(alerts, timestamps) if alert[1] == src] + relevant_states = [state for alert, state in zip(alerts, conn_state) if alert[1] == src] + if (max(relevant_timestamps) - min(relevant_timestamps) <= time_window and + any(state == 'S0' for state in relevant_states if state)): + specific_attackers = [src] + specific_victims = list(set(alert[3] for alert in alerts if alert[1] == src)) + iocs.append((otm, "one-to-many (Reconnaissance - Portscan)", specific_attackers, specific_victims)) + + # Exploit-Erkennung + for (src, dst), count in pair_counts.items(): + if count >= exploit_threshold: + relevant_timestamps = [ts for alert, ts in zip(alerts, timestamps) if alert[1] == src and alert[3] == dst] + relevant_states = [state for alert, state in zip(alerts, conn_state) if alert[1] == src and alert[3] == dst] + if (max(relevant_timestamps) - min(relevant_timestamps) <= time_window and + sum(1 for state in relevant_states if state == 'SF') >= exploit_threshold): + iocs.append((oto, "one-to-one (Exploitation)", [src], [dst])) + + # DDoS-Erkennung + if A > 1 and T == 1 and A >= 5: + iocs.append((mto, "many-to-one (Coordinated Attack - DDoS)", attackers, victims)) + + # Worm-Erkennung + if A > 1 and T > 1 and A >= 5 and T >= 5: + if any(victim in attackers for victim in victims): + iocs.append((mtm, "many-to-many (Worm Propagation)", attackers, victims)) + + if not iocs: + return [(0.0, "no attack", attackers, victims)] + + # False Positive Check + filtered_iocs = [] + for certainty, pattern, atkrs, victs in iocs: + if random.random() < 0.1 and pattern != "one-to-many (Reconnaissance - Portscan)": + filtered_iocs.append((0.0, f"{pattern} (possible false positive)", atkrs, victs)) + else: + filtered_iocs.append((certainty, pattern, atkrs, victs)) + + return filtered_iocs + +def detect_iocs(alerts: np.ndarray, similarity_threshold: float = 0.25, clique_size: int = 3, + time_window: float = 60.0, exploit_threshold: int = 10, scan_threshold: int = 20, + subnet: str = "147.32.84.0/24") -> List[Tuple[float, str, np.ndarray, List[str], List[str]]]: + sim_graph = alert_similarity_graph(alerts, similarity_threshold, time_window) + cliques = cluster_cliques(sim_graph, clique_size) + + iocs = [] + for clique in cliques: + alerts_in_clique = get_alerts_by_uid(alerts, list(clique)) + flow_graph = netflow_graph(alerts_in_clique) + detected_iocs = infer_label(flow_graph, alerts_in_clique, time_window, exploit_threshold, scan_threshold, subnet) + for certainty, pattern_name, attackers, victims in detected_iocs: + if certainty > 0: + iocs.append((certainty, pattern_name, alerts_in_clique, attackers, victims)) + logging.info(f"Detected IoC: {pattern_name} with certainty {certainty:.2f}, {len(attackers)} attackers, {len(victims)} victims") + + return iocs + +def send_iocs_to_server(iocs, server_url, network_id): + """Transmit detected IoCs to the central server via HTTPS.""" + iocs_with_id = [{"certainty": c, "pattern": p, "alerts": a.tolist(), "attackers": atk, "victims": v, + "network_id": network_id, "timestamp": float(a[0][5])} + for c, p, a, atk, v in iocs] + try: + response = requests.post(f"{server_url}/submit_ioc", json=iocs_with_id, verify=False) + if response.status_code == 200: + logging.info(f"IoCs successfully sent to server (Network {network_id})") + else: + logging.error(f"Error sending IoCs: {response.status_code}") + except Exception as e: + logging.error(f"Connection error sending to server: {e}") + +app = Flask(__name__) + +@app.route('/alert', methods=['POST']) +def receive_alert(): + """Receive and log alerts from the server.""" + alert_plus = request.json + if alert_plus["source_network"] == alert_plus["current_network"]: + logging.info(f"Warning: Multi-step attack detected in this network ({alert_plus['source_network']})!") + else: + logging.info(f"Warning: Multi-step attack detected in {alert_plus['source_network']}!") + logging.info(f"Attack type: {alert_plus['type']}") + logging.info(f"Attackers: {alert_plus['attackers']}") + logging.info(f"Victims: {alert_plus['victims']}") + logging.info(f"Recommendation: {alert_plus['recommendation']}") + return jsonify({"status": "received"}), 200 + +def main(): + parser = argparse.ArgumentParser(description="Graph-based IoC Detection with Dynamic Whitelist") + parser.add_argument('--conn_files', nargs='+', type=str, help="Paths to network conn log files (e.g., conn1.log conn2.log)") + parser.add_argument('--network_ids', nargs='+', type=str, required=True, help="Unique identifiers for home networks (e.g., HomeNet1 HomeNet2)") + parser.add_argument('--server_url', type=str, default="https://localhost:443", help="URL of the central server") + parser.add_argument('--similarity_threshold', type=float, default=0.25, help="Similarity threshold for alert clustering") + parser.add_argument('--clique_size', type=int, default=3, help="Minimum clique size for community detection") + parser.add_argument('--time_window', type=float, default=60.0, help="Time window in seconds for detection") + parser.add_argument('--exploit_threshold', type=int, default=10, help="Threshold for exploitation connection count") + parser.add_argument('--scan_threshold', type=int, default=20, help="Threshold for scan target count") + parser.add_argument('--subnet', type=str, default="147.32.84.0/24", help="Subnet for LAN detection") + args = parser.parse_args() + + if len(args.conn_files) != len(args.network_ids): + raise ValueError("Number of conn_files must match number of network_ids") + + try: + alerts_list = [] + for conn_file, network_id in zip(args.conn_files, args.network_ids): + alerts = parse_log_file(conn_file, log_type='conn', time_window=args.time_window, network_id=network_id) + alerts_list.append(alerts) + + if not alerts_list: + raise ValueError("At least one conn_file must be provided") + alerts = np.concatenate(alerts_list) if len(alerts_list) > 1 else alerts_list[0] + + iocs = detect_iocs(alerts, args.similarity_threshold, args.clique_size, args.time_window, + args.exploit_threshold, args.scan_threshold, args.subnet) + + if not iocs: + print("No Indicators of Compromise (IoCs) detected.") + else: + for certainty, pattern, alerts_in_clique, attackers, victims in iocs: + print(f"Pattern: {pattern}, Certainty: {certainty:.2f}") + print(f"Attackers: {attackers}") + print(f"Victims: {victims}") + print(f"Number of alerts: {len(alerts_in_clique)}") + print("Sample alerts:") + for alert in alerts_in_clique[:5]: + extra_field = alert[7] if len(alert) > 7 else None + extra_label = "Query" if isinstance(extra_field, str) and extra_field not in ['REJ', 'S0', 'SF'] else "State" + network_id = alert[-1] # Network ID is the last field + print(f" UID: {alert[0]}, Src: {alert[1]}:{alert[2]}, Dst: {alert[3]}:{alert[4]}, TS: {alert[5]}, Proto: {alert[6]}, {extra_label}: {extra_field}, Network: {network_id}") + print("---") + + if iocs: + for network_id in args.network_ids: + send_iocs_to_server(iocs, args.server_url, network_id) + + context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + cert_path = "cert.pem" + key_path = "key.pem" + if not (os.path.exists(cert_path) and os.path.exists(key_path)): + raise FileNotFoundError("SSL certificates (cert.pem, key.pem) not found. Generate them with OpenSSL.") + context.load_cert_chain(cert_path, key_path) + app.run(host='0.0.0.0', port=8080, ssl_context=context, threaded=False) + + except Exception as e: + print(f"An error occurred: {e}") + logging.error(f"Error in main: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file -- GitLab