diff --git a/backend/src/app.module.ts b/backend/src/app.module.ts index e5085d4..41608f7 100644 --- a/backend/src/app.module.ts +++ b/backend/src/app.module.ts @@ -1,5 +1,5 @@ import { CacheModule } from "@nestjs/cache-manager"; -import { Module } from "@nestjs/common"; +import { MiddlewareConsumer, Module, NestModule } from "@nestjs/common"; import { ConfigModule, ConfigService } from "@nestjs/config"; import { ScheduleModule } from "@nestjs/schedule"; import { ThrottlerModule } from "@nestjs/throttler"; @@ -10,6 +10,7 @@ import { AppService } from "./app.service"; import { AuthModule } from "./auth/auth.module"; import { CategoriesModule } from "./categories/categories.module"; import { CommonModule } from "./common/common.module"; +import { CrawlerDetectionMiddleware } from "./common/middlewares/crawler-detection.middleware"; import { validateEnv } from "./config/env.schema"; import { ContentsModule } from "./contents/contents.module"; import { CryptoModule } from "./crypto/crypto.module"; @@ -71,4 +72,8 @@ import { UsersModule } from "./users/users.module"; controllers: [AppController, HealthController], providers: [AppService], }) -export class AppModule {} +export class AppModule implements NestModule { + configure(consumer: MiddlewareConsumer) { + consumer.apply(CrawlerDetectionMiddleware).forRoutes("*"); + } +} diff --git a/backend/src/common/middlewares/crawler-detection.middleware.ts b/backend/src/common/middlewares/crawler-detection.middleware.ts new file mode 100644 index 0000000..01149d1 --- /dev/null +++ b/backend/src/common/middlewares/crawler-detection.middleware.ts @@ -0,0 +1,67 @@ +import { Injectable, Logger, NestMiddleware } from "@nestjs/common"; +import type { NextFunction, Request, Response } from "express"; + +@Injectable() +export class CrawlerDetectionMiddleware implements NestMiddleware { + private readonly logger = new Logger("CrawlerDetection"); + + private readonly SUSPICIOUS_PATTERNS = [ + /\.env/, + /wp-admin/, + /wp-login/, + /\.git/, + /\.php$/, + /xmlrpc/, + /config/, + /setup/, + /wp-config/, + /_next/, + /install/, + /admin/, + /phpmyadmin/, + /sql/, + /backup/, + /db\./, + /backup\./, + /cgi-bin/, + /\.well-known\/security\.txt/, // Bien que légitime, souvent scanné + ]; + + private readonly BOT_USER_AGENTS = [ + /bot/i, + /crawler/i, + /spider/i, + /python/i, + /curl/i, + /wget/i, + /nmap/i, + /nikto/i, + /zgrab/i, + /masscan/i, + ]; + + use(req: Request, res: Response, next: NextFunction) { + const { method, url, ip } = req; + const userAgent = req.get("user-agent") || "unknown"; + + res.on("finish", () => { + if (res.statusCode === 404) { + const isSuspiciousPath = this.SUSPICIOUS_PATTERNS.some((pattern) => + pattern.test(url), + ); + const isBotUserAgent = this.BOT_USER_AGENTS.some((pattern) => + pattern.test(userAgent), + ); + + if (isSuspiciousPath || isBotUserAgent) { + this.logger.warn( + `Potential crawler detected: [${ip}] ${method} ${url} - User-Agent: ${userAgent}`, + ); + // Ici, on pourrait ajouter une logique pour bannir l'IP temporairement via Redis + } + } + }); + + next(); + } +}