Introduce `CrawlerDetectionMiddleware` to identify and log potential crawlers or bots accessing suspicious paths or using bot-like user agents. Middleware applied globally to all routes in `AppModule`.
68 lines
1.4 KiB
TypeScript
68 lines
1.4 KiB
TypeScript
import { Injectable, Logger, NestMiddleware } from "@nestjs/common";
|
|
import type { NextFunction, Request, Response } from "express";
|
|
|
|
@Injectable()
|
|
export class CrawlerDetectionMiddleware implements NestMiddleware {
|
|
private readonly logger = new Logger("CrawlerDetection");
|
|
|
|
private readonly SUSPICIOUS_PATTERNS = [
|
|
/\.env/,
|
|
/wp-admin/,
|
|
/wp-login/,
|
|
/\.git/,
|
|
/\.php$/,
|
|
/xmlrpc/,
|
|
/config/,
|
|
/setup/,
|
|
/wp-config/,
|
|
/_next/,
|
|
/install/,
|
|
/admin/,
|
|
/phpmyadmin/,
|
|
/sql/,
|
|
/backup/,
|
|
/db\./,
|
|
/backup\./,
|
|
/cgi-bin/,
|
|
/\.well-known\/security\.txt/, // Bien que légitime, souvent scanné
|
|
];
|
|
|
|
private readonly BOT_USER_AGENTS = [
|
|
/bot/i,
|
|
/crawler/i,
|
|
/spider/i,
|
|
/python/i,
|
|
/curl/i,
|
|
/wget/i,
|
|
/nmap/i,
|
|
/nikto/i,
|
|
/zgrab/i,
|
|
/masscan/i,
|
|
];
|
|
|
|
use(req: Request, res: Response, next: NextFunction) {
|
|
const { method, url, ip } = req;
|
|
const userAgent = req.get("user-agent") || "unknown";
|
|
|
|
res.on("finish", () => {
|
|
if (res.statusCode === 404) {
|
|
const isSuspiciousPath = this.SUSPICIOUS_PATTERNS.some((pattern) =>
|
|
pattern.test(url),
|
|
);
|
|
const isBotUserAgent = this.BOT_USER_AGENTS.some((pattern) =>
|
|
pattern.test(userAgent),
|
|
);
|
|
|
|
if (isSuspiciousPath || isBotUserAgent) {
|
|
this.logger.warn(
|
|
`Potential crawler detected: [${ip}] ${method} ${url} - User-Agent: ${userAgent}`,
|
|
);
|
|
// Ici, on pourrait ajouter une logique pour bannir l'IP temporairement via Redis
|
|
}
|
|
}
|
|
});
|
|
|
|
next();
|
|
}
|
|
}
|