|
|
|
@ -15,17 +15,23 @@ import datetime
|
|
|
|
|
import uuid
|
|
|
|
|
from base64 import b64encode
|
|
|
|
|
from operator import itemgetter
|
|
|
|
|
from langchain_openai import ChatOpenAI
|
|
|
|
|
from langchain_openai import ChatOpenAI, AzureChatOpenAI
|
|
|
|
|
from langchain_aws import ChatBedrock, ChatBedrockConverse
|
|
|
|
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
|
|
|
from langchain_ollama import ChatOllama
|
|
|
|
|
from langchain_core.messages import HumanMessage, SystemMessage, trim_messages
|
|
|
|
|
from langchain_core.chat_history import BaseChatMessageHistory, InMemoryChatMessageHistory
|
|
|
|
|
from langchain_core.runnables.history import RunnableWithMessageHistory
|
|
|
|
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
|
|
|
from langchain_core.runnables import RunnablePassthrough
|
|
|
|
|
from asyncssh.misc import ConnectionLost
|
|
|
|
|
import socket
|
|
|
|
|
|
|
|
|
|
class JSONFormatter(logging.Formatter):
|
|
|
|
|
def __init__(self, sensor_name, *args, **kwargs):
|
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
self.sensor_name = sensor_name
|
|
|
|
|
|
|
|
|
|
def format(self, record):
|
|
|
|
|
log_record = {
|
|
|
|
|
"timestamp": datetime.datetime.fromtimestamp(record.created, datetime.timezone.utc).isoformat(sep="T", timespec="milliseconds"),
|
|
|
|
@ -35,7 +41,9 @@ class JSONFormatter(logging.Formatter):
|
|
|
|
|
"src_port": record.src_port,
|
|
|
|
|
"dst_ip": record.dst_ip,
|
|
|
|
|
"dst_port": record.dst_port,
|
|
|
|
|
"message": record.getMessage()
|
|
|
|
|
"message": record.getMessage(),
|
|
|
|
|
"sensor_name": self.sensor_name,
|
|
|
|
|
"sensor_protocol": "ssh"
|
|
|
|
|
}
|
|
|
|
|
if hasattr(record, 'interactive'):
|
|
|
|
|
log_record["interactive"] = record.interactive
|
|
|
|
@ -168,6 +176,7 @@ representative examples.
|
|
|
|
|
judgement = "MALICIOUS"
|
|
|
|
|
|
|
|
|
|
logger.info("Session summary", extra={"details": llm_response.content, "judgement": judgement})
|
|
|
|
|
|
|
|
|
|
server.summary_generated = True
|
|
|
|
|
|
|
|
|
|
async def handle_client(process: asyncssh.SSHServerProcess, server: MySSHServer) -> None:
|
|
|
|
@ -202,7 +211,7 @@ async def handle_client(process: asyncssh.SSHServerProcess, server: MySSHServer)
|
|
|
|
|
# Handle interactive session
|
|
|
|
|
llm_response = await with_message_history.ainvoke(
|
|
|
|
|
{
|
|
|
|
|
"messages": [HumanMessage(content="ignore this message")],
|
|
|
|
|
"messages": [HumanMessage(content="")],
|
|
|
|
|
"username": process.get_extra_info('username'),
|
|
|
|
|
"interactive": True
|
|
|
|
|
},
|
|
|
|
@ -225,7 +234,7 @@ async def handle_client(process: asyncssh.SSHServerProcess, server: MySSHServer)
|
|
|
|
|
},
|
|
|
|
|
config=llm_config
|
|
|
|
|
)
|
|
|
|
|
if llm_response.content == "XXX-END-OF-SESSION-XXX":
|
|
|
|
|
if llm_response.content == "YYY-END-OF-SESSION-YYY":
|
|
|
|
|
await session_summary(process, llm_config, with_message_history, server)
|
|
|
|
|
process.exit(0)
|
|
|
|
|
return
|
|
|
|
@ -269,7 +278,7 @@ class ContextFilter(logging.Filter):
|
|
|
|
|
if task:
|
|
|
|
|
task_name = task.get_name()
|
|
|
|
|
else:
|
|
|
|
|
task_name = "-"
|
|
|
|
|
task_name = thread_local.__dict__.get('session_id', '-')
|
|
|
|
|
|
|
|
|
|
record.src_ip = thread_local.__dict__.get('src_ip', '-')
|
|
|
|
|
record.src_port = thread_local.__dict__.get('src_port', '-')
|
|
|
|
@ -277,7 +286,7 @@ class ContextFilter(logging.Filter):
|
|
|
|
|
record.dst_port = thread_local.__dict__.get('dst_port', '-')
|
|
|
|
|
|
|
|
|
|
record.task_name = task_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def llm_get_session_history(session_id: str) -> BaseChatMessageHistory:
|
|
|
|
@ -296,23 +305,43 @@ def get_user_accounts() -> dict:
|
|
|
|
|
|
|
|
|
|
return accounts
|
|
|
|
|
|
|
|
|
|
def choose_llm():
|
|
|
|
|
llm_provider_name = config['llm'].get("llm_provider", "openai")
|
|
|
|
|
def choose_llm(llm_provider: Optional[str] = None, model_name: Optional[str] = None):
|
|
|
|
|
llm_provider_name = llm_provider or config['llm'].get("llm_provider", "openai")
|
|
|
|
|
llm_provider_name = llm_provider_name.lower()
|
|
|
|
|
model_name = config['llm'].get("model_name", "gpt-3.5-turbo")
|
|
|
|
|
model_name = model_name or config['llm'].get("model_name", "gpt-4o-mini")
|
|
|
|
|
|
|
|
|
|
# Get temperature parameter from config, default to 0.2 if not specified
|
|
|
|
|
temperature = config['llm'].getfloat("temperature", 0.2)
|
|
|
|
|
|
|
|
|
|
if llm_provider_name == 'openai':
|
|
|
|
|
llm_model = ChatOpenAI(
|
|
|
|
|
model=model_name
|
|
|
|
|
model=model_name,
|
|
|
|
|
temperature=temperature
|
|
|
|
|
)
|
|
|
|
|
elif llm_provider_name == 'azure':
|
|
|
|
|
llm_model = AzureChatOpenAI(
|
|
|
|
|
azure_deployment=config['llm'].get("azure_deployment"),
|
|
|
|
|
azure_endpoint=config['llm'].get("azure_endpoint"),
|
|
|
|
|
api_version=config['llm'].get("azure_api_version"),
|
|
|
|
|
model=config['llm'].get("model_name"), # Ensure model_name is passed here
|
|
|
|
|
temperature=temperature
|
|
|
|
|
)
|
|
|
|
|
elif llm_provider_name == 'ollama':
|
|
|
|
|
llm_model = ChatOllama(
|
|
|
|
|
model=model_name,
|
|
|
|
|
temperature=temperature
|
|
|
|
|
)
|
|
|
|
|
elif llm_provider_name == 'aws':
|
|
|
|
|
llm_model = ChatBedrockConverse(
|
|
|
|
|
model=model_name,
|
|
|
|
|
region_name=config['llm'].get("aws_region", "us-east-1"),
|
|
|
|
|
credentials_profile_name=config['llm'].get("aws_credentials_profile", "default") )
|
|
|
|
|
credentials_profile_name=config['llm'].get("aws_credentials_profile", "default"),
|
|
|
|
|
temperature=temperature
|
|
|
|
|
)
|
|
|
|
|
elif llm_provider_name == 'gemini':
|
|
|
|
|
llm_model = ChatGoogleGenerativeAI(
|
|
|
|
|
model=model_name,
|
|
|
|
|
temperature=temperature
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError(f"Invalid LLM provider {llm_provider_name}.")
|
|
|
|
@ -347,25 +376,82 @@ def get_prompts(prompt: Optional[str], prompt_file: Optional[str]) -> dict:
|
|
|
|
|
try:
|
|
|
|
|
# Parse command line arguments
|
|
|
|
|
parser = argparse.ArgumentParser(description='Start the SSH honeypot server.')
|
|
|
|
|
parser.add_argument('-c', '--config', type=str, default='config.ini', help='Path to the configuration file')
|
|
|
|
|
parser.add_argument('-c', '--config', type=str, default=None, help='Path to the configuration file')
|
|
|
|
|
parser.add_argument('-p', '--prompt', type=str, help='The entire text of the prompt')
|
|
|
|
|
parser.add_argument('-f', '--prompt-file', type=str, default='prompt.txt', help='Path to the prompt file')
|
|
|
|
|
parser.add_argument('-l', '--llm-provider', type=str, help='The LLM provider to use')
|
|
|
|
|
parser.add_argument('-m', '--model-name', type=str, help='The model name to use')
|
|
|
|
|
parser.add_argument('-t', '--trimmer-max-tokens', type=int, help='The maximum number of tokens to send to the LLM backend in a single request')
|
|
|
|
|
parser.add_argument('-s', '--system-prompt', type=str, help='System prompt for the LLM')
|
|
|
|
|
parser.add_argument('-r', '--temperature', type=float, help='Temperature parameter for controlling randomness in LLM responses (0.0-2.0)')
|
|
|
|
|
parser.add_argument('-P', '--port', type=int, help='The port the SSH honeypot will listen on')
|
|
|
|
|
parser.add_argument('-k', '--host-priv-key', type=str, help='The host key to use for the SSH server')
|
|
|
|
|
parser.add_argument('-v', '--server-version-string', type=str, help='The server version string to send to clients')
|
|
|
|
|
parser.add_argument('-L', '--log-file', type=str, help='The name of the file you wish to write the honeypot log to')
|
|
|
|
|
parser.add_argument('-S', '--sensor-name', type=str, help='The name of the sensor, used to identify this honeypot in the logs')
|
|
|
|
|
parser.add_argument('-u', '--user-account', action='append', help='User account in the form username=password. Can be repeated.')
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
# Check if the config file exists
|
|
|
|
|
if not os.path.exists(args.config):
|
|
|
|
|
print(f"Error: The specified config file '{args.config}' does not exist.", file=sys.stderr)
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
# Determine which config file to load
|
|
|
|
|
config = ConfigParser()
|
|
|
|
|
if args.config is not None:
|
|
|
|
|
# User explicitly set a config file; error if it doesn't exist.
|
|
|
|
|
if not os.path.exists(args.config):
|
|
|
|
|
print(f"Error: The specified config file '{args.config}' does not exist.", file=sys.stderr)
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
config.read(args.config)
|
|
|
|
|
else:
|
|
|
|
|
default_config = "config.ini"
|
|
|
|
|
if os.path.exists(default_config):
|
|
|
|
|
config.read(default_config)
|
|
|
|
|
else:
|
|
|
|
|
# Use defaults when no config file found.
|
|
|
|
|
config['honeypot'] = {'log_file': 'ssh_log.log', 'sensor_name': socket.gethostname()}
|
|
|
|
|
config['ssh'] = {'port': '8022', 'host_priv_key': 'ssh_host_key', 'server_version_string': 'SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.3'}
|
|
|
|
|
config['llm'] = {'llm_provider': 'openai', 'model_name': 'gpt-3.5-turbo', 'trimmer_max_tokens': '64000', 'temperature': '0.7', 'system_prompt': ''}
|
|
|
|
|
config['user_accounts'] = {}
|
|
|
|
|
|
|
|
|
|
# Override config values with command line arguments if provided
|
|
|
|
|
if args.llm_provider:
|
|
|
|
|
config['llm']['llm_provider'] = args.llm_provider
|
|
|
|
|
if args.model_name:
|
|
|
|
|
config['llm']['model_name'] = args.model_name
|
|
|
|
|
if args.trimmer_max_tokens:
|
|
|
|
|
config['llm']['trimmer_max_tokens'] = str(args.trimmer_max_tokens)
|
|
|
|
|
if args.system_prompt:
|
|
|
|
|
config['llm']['system_prompt'] = args.system_prompt
|
|
|
|
|
if args.temperature is not None:
|
|
|
|
|
config['llm']['temperature'] = str(args.temperature)
|
|
|
|
|
if args.port:
|
|
|
|
|
config['ssh']['port'] = str(args.port)
|
|
|
|
|
if args.host_priv_key:
|
|
|
|
|
config['ssh']['host_priv_key'] = args.host_priv_key
|
|
|
|
|
if args.server_version_string:
|
|
|
|
|
config['ssh']['server_version_string'] = args.server_version_string
|
|
|
|
|
if args.log_file:
|
|
|
|
|
config['honeypot']['log_file'] = args.log_file
|
|
|
|
|
if args.sensor_name:
|
|
|
|
|
config['honeypot']['sensor_name'] = args.sensor_name
|
|
|
|
|
|
|
|
|
|
# Merge command-line user accounts into the config
|
|
|
|
|
if args.user_account:
|
|
|
|
|
if 'user_accounts' not in config:
|
|
|
|
|
config['user_accounts'] = {}
|
|
|
|
|
for account in args.user_account:
|
|
|
|
|
if '=' in account:
|
|
|
|
|
key, value = account.split('=', 1)
|
|
|
|
|
config['user_accounts'][key.strip()] = value.strip()
|
|
|
|
|
else:
|
|
|
|
|
config['user_accounts'][account.strip()] = ''
|
|
|
|
|
|
|
|
|
|
# Read the user accounts from the configuration
|
|
|
|
|
accounts = get_user_accounts()
|
|
|
|
|
|
|
|
|
|
# Always use UTC for logging
|
|
|
|
|
logging.Formatter.formatTime = (lambda self, record, datefmt=None: datetime.datetime.fromtimestamp(record.created, datetime.timezone.utc).isoformat(sep="T",timespec="milliseconds"))
|
|
|
|
|
|
|
|
|
|
# Read our configuration file
|
|
|
|
|
config = ConfigParser()
|
|
|
|
|
config.read(args.config)
|
|
|
|
|
|
|
|
|
|
# Read the user accounts from the configuration file
|
|
|
|
|
accounts = get_user_accounts()
|
|
|
|
|
# Get the sensor name from the config or use the system's hostname
|
|
|
|
|
sensor_name = config['honeypot'].get('sensor_name', socket.gethostname())
|
|
|
|
|
|
|
|
|
|
# Set up the honeypot logger
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@ -374,7 +460,7 @@ try:
|
|
|
|
|
log_file_handler = logging.FileHandler(config['honeypot'].get("log_file", "ssh_log.log"))
|
|
|
|
|
logger.addHandler(log_file_handler)
|
|
|
|
|
|
|
|
|
|
log_file_handler.setFormatter(JSONFormatter())
|
|
|
|
|
log_file_handler.setFormatter(JSONFormatter(sensor_name))
|
|
|
|
|
|
|
|
|
|
f = ContextFilter()
|
|
|
|
|
logger.addFilter(f)
|
|
|
|
@ -385,7 +471,7 @@ try:
|
|
|
|
|
llm_system_prompt = prompts["system_prompt"]
|
|
|
|
|
llm_user_prompt = prompts["user_prompt"]
|
|
|
|
|
|
|
|
|
|
llm = choose_llm()
|
|
|
|
|
llm = choose_llm(config['llm'].get("llm_provider"), config['llm'].get("model_name"))
|
|
|
|
|
|
|
|
|
|
llm_sessions = dict()
|
|
|
|
|
|
|
|
|
|