feat(middleware): add crawler detection middleware for suspicious requests

Introduce `CrawlerDetectionMiddleware` to identify and log potential crawlers or bots accessing suspicious paths or using bot-like user agents. Middleware applied globally to all routes in `AppModule`.
This commit is contained in:
Mathis HERRIOT
2026-01-14 20:41:25 +01:00
parent 5665fcd98f
commit 5f2672021e
2 changed files with 74 additions and 2 deletions

View File

@@ -1,5 +1,5 @@
import { CacheModule } from "@nestjs/cache-manager";
import { Module } from "@nestjs/common";
import { MiddlewareConsumer, Module, NestModule } from "@nestjs/common";
import { ConfigModule, ConfigService } from "@nestjs/config";
import { ScheduleModule } from "@nestjs/schedule";
import { ThrottlerModule } from "@nestjs/throttler";
@@ -10,6 +10,7 @@ import { AppService } from "./app.service";
import { AuthModule } from "./auth/auth.module";
import { CategoriesModule } from "./categories/categories.module";
import { CommonModule } from "./common/common.module";
import { CrawlerDetectionMiddleware } from "./common/middlewares/crawler-detection.middleware";
import { validateEnv } from "./config/env.schema";
import { ContentsModule } from "./contents/contents.module";
import { CryptoModule } from "./crypto/crypto.module";
@@ -71,4 +72,8 @@ import { UsersModule } from "./users/users.module";
controllers: [AppController, HealthController],
providers: [AppService],
})
export class AppModule {}
export class AppModule implements NestModule {
configure(consumer: MiddlewareConsumer) {
consumer.apply(CrawlerDetectionMiddleware).forRoutes("*");
}
}

View File

@@ -0,0 +1,67 @@
import { Injectable, Logger, NestMiddleware } from "@nestjs/common";
import type { NextFunction, Request, Response } from "express";
@Injectable()
export class CrawlerDetectionMiddleware implements NestMiddleware {
private readonly logger = new Logger("CrawlerDetection");
private readonly SUSPICIOUS_PATTERNS = [
/\.env/,
/wp-admin/,
/wp-login/,
/\.git/,
/\.php$/,
/xmlrpc/,
/config/,
/setup/,
/wp-config/,
/_next/,
/install/,
/admin/,
/phpmyadmin/,
/sql/,
/backup/,
/db\./,
/backup\./,
/cgi-bin/,
/\.well-known\/security\.txt/, // Bien que légitime, souvent scanné
];
private readonly BOT_USER_AGENTS = [
/bot/i,
/crawler/i,
/spider/i,
/python/i,
/curl/i,
/wget/i,
/nmap/i,
/nikto/i,
/zgrab/i,
/masscan/i,
];
use(req: Request, res: Response, next: NextFunction) {
const { method, url, ip } = req;
const userAgent = req.get("user-agent") || "unknown";
res.on("finish", () => {
if (res.statusCode === 404) {
const isSuspiciousPath = this.SUSPICIOUS_PATTERNS.some((pattern) =>
pattern.test(url),
);
const isBotUserAgent = this.BOT_USER_AGENTS.some((pattern) =>
pattern.test(userAgent),
);
if (isSuspiciousPath || isBotUserAgent) {
this.logger.warn(
`Potential crawler detected: [${ip}] ${method} ${url} - User-Agent: ${userAgent}`,
);
// Ici, on pourrait ajouter une logique pour bannir l'IP temporairement via Redis
}
}
});
next();
}
}