memegoat/backend/src/common/middlewares/crawler-detection.middleware.ts

import { Injectable, Logger, NestMiddleware } from "@nestjs/common";
import type { NextFunction, Request, Response } from "express";

@Injectable()
export class CrawlerDetectionMiddleware implements NestMiddleware {
	private readonly logger = new Logger("CrawlerDetection");

	private readonly SUSPICIOUS_PATTERNS = [
		/\.env/,
		/wp-admin/,
		/wp-login/,
		/\.git/,
		/\.php$/,
		/xmlrpc/,
		/config/,
		/setup/,
		/wp-config/,
		/_next/,
		/install/,
		/admin/,
		/phpmyadmin/,
		/sql/,
		/backup/,
		/db\./,
		/backup\./,
		/cgi-bin/,
		/\.well-known\/security\.txt/, // Bien que légitime, souvent scanné
	];

	private readonly BOT_USER_AGENTS = [
		/bot/i,
		/crawler/i,
		/spider/i,
		/python/i,
		/curl/i,
		/wget/i,
		/nmap/i,
		/nikto/i,
		/zgrab/i,
		/masscan/i,
	];

	use(req: Request, res: Response, next: NextFunction) {
		const { method, url, ip } = req;
		const userAgent = req.get("user-agent") || "unknown";

		res.on("finish", () => {
			if (res.statusCode === 404) {
				const isSuspiciousPath = this.SUSPICIOUS_PATTERNS.some((pattern) =>
					pattern.test(url),
				);
				const isBotUserAgent = this.BOT_USER_AGENTS.some((pattern) =>
					pattern.test(userAgent),
				);

				if (isSuspiciousPath || isBotUserAgent) {
					this.logger.warn(
						`Potential crawler detected: [${ip}] ${method} ${url} - User-Agent: ${userAgent}`,
					);
					// Ici, on pourrait ajouter une logique pour bannir l'IP temporairement via Redis
				}
			}
		});

		next();
	}
}