From 527e9b121922196a35ed4d77fdadcbb2a178794b Mon Sep 17 00:00:00 2001 From: Patrick Plate Date: Mon, 15 Jun 2026 17:30:28 +0200 Subject: [PATCH] =?UTF-8?q?feat(sprint10):=20Phase=202=20=E2=80=94=20Payme?= =?UTF-8?q?nt=20matching=20engine=20with=20confidence=20scoring?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/bankimport/MatchResult.java | 36 ++ .../bankimport/MemberMatchContext.java | 36 ++ .../bankimport/PaymentMatchingService.java | 506 ++++++++++++++++++ 3 files changed, 578 insertions(+) create mode 100644 cannamanage-service/src/main/java/de/cannamanage/service/bankimport/MatchResult.java create mode 100644 cannamanage-service/src/main/java/de/cannamanage/service/bankimport/MemberMatchContext.java create mode 100644 cannamanage-service/src/main/java/de/cannamanage/service/bankimport/PaymentMatchingService.java diff --git a/cannamanage-service/src/main/java/de/cannamanage/service/bankimport/MatchResult.java b/cannamanage-service/src/main/java/de/cannamanage/service/bankimport/MatchResult.java new file mode 100644 index 0000000..c320260 --- /dev/null +++ b/cannamanage-service/src/main/java/de/cannamanage/service/bankimport/MatchResult.java @@ -0,0 +1,36 @@ +package de.cannamanage.service.bankimport; + +import de.cannamanage.domain.enums.MatchStatus; + +import java.util.Map; +import java.util.UUID; + +/** + * Sprint 10 — Per-transaction outcome of {@link PaymentMatchingService}. + *

+ * Exposes the chosen member, the aggregated confidence (0–100), the resulting + * {@link MatchStatus} classification and a per-criterion {@code scoreBreakdown} + * for diagnostics / UI hover-tooltips. Returned by + * {@link PaymentMatchingService#scoreAll(java.util.List, java.util.List)} for unit + * tests; the public {@code matchTransactions} API folds these results back into + * {@code BankTransaction} entities for the orchestrator. + * + * @param matchedMemberId chosen member, {@code null} when {@code classification == UNMATCHED} + * @param matchedMemberName chosen member's full name for log/UI display, {@code null} when unmatched + * @param confidence 0–100 aggregated weighted score + * @param classification {@link MatchStatus#MATCHED} (≥90), {@link MatchStatus#SUGGESTED} (60–89) + * or {@link MatchStatus#UNMATCHED} (<60) + * @param scoreBreakdown criterion → individual 0–100 score; useful for explaining a match in the UI + */ +public record MatchResult( + UUID matchedMemberId, + String matchedMemberName, + int confidence, + MatchStatus classification, + Map scoreBreakdown +) { + + public static MatchResult unmatched() { + return new MatchResult(null, null, 0, MatchStatus.UNMATCHED, Map.of()); + } +} diff --git a/cannamanage-service/src/main/java/de/cannamanage/service/bankimport/MemberMatchContext.java b/cannamanage-service/src/main/java/de/cannamanage/service/bankimport/MemberMatchContext.java new file mode 100644 index 0000000..60b9cd3 --- /dev/null +++ b/cannamanage-service/src/main/java/de/cannamanage/service/bankimport/MemberMatchContext.java @@ -0,0 +1,36 @@ +package de.cannamanage.service.bankimport; + +import java.util.UUID; + +/** + * Sprint 10 — Pre-computed matching context for one member, prepared once per + * import run and re-used across every transaction in + * {@link PaymentMatchingService#matchTransactions}. + *

+ * Holding the {@code expectedAmountCents} on the context avoids re-querying + * the fee schedule for every (transaction × member) pair and unlocks the + * early-exit optimisation when the bank amount deviates by more than 20% + * from the expected fee. + * + * @param memberId member primary key + * @param memberNumber the {@code Member.membershipNumber} (e.g. {@code "M-0042"}) + * @param fullName "{firstName} {lastName}" — already pre-joined for scoring + * @param iban member IBAN if BANK_DATA consent was granted, otherwise {@code null} + * @param expectedAmountCents fee schedule amount valid at the import's booking-date context, + * or {@code -1} if the member has no active fee assignment for that period + */ +record MemberMatchContext( + UUID memberId, + String memberNumber, + String fullName, + String iban, + int expectedAmountCents +) { + + /** Sentinel value indicating the member has no fee assignment for the matching period. */ + static final int NO_EXPECTED_AMOUNT = -1; + + boolean hasExpectedAmount() { + return expectedAmountCents > 0; + } +} diff --git a/cannamanage-service/src/main/java/de/cannamanage/service/bankimport/PaymentMatchingService.java b/cannamanage-service/src/main/java/de/cannamanage/service/bankimport/PaymentMatchingService.java new file mode 100644 index 0000000..6505c45 --- /dev/null +++ b/cannamanage-service/src/main/java/de/cannamanage/service/bankimport/PaymentMatchingService.java @@ -0,0 +1,506 @@ +package de.cannamanage.service.bankimport; + +import de.cannamanage.domain.entity.BankTransaction; +import de.cannamanage.domain.entity.FeeSchedule; +import de.cannamanage.domain.entity.Member; +import de.cannamanage.domain.entity.MemberFeeAssignment; +import de.cannamanage.domain.enums.FeeInterval; +import de.cannamanage.domain.enums.MatchStatus; +import de.cannamanage.domain.enums.MemberStatus; +import de.cannamanage.service.repository.FeeScheduleRepository; +import de.cannamanage.service.repository.MemberFeeAssignmentRepository; +import de.cannamanage.service.repository.MemberRepository; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.UUID; + +/** + * Sprint 10 — Deterministic, in-memory matching engine that pairs parsed + * bank-statement transactions to club members. + *

+ * The algorithm is a weighted-confidence model with four criteria: + * + * + * + * + * + * + * + *
Scoring weights (sum = 1.00)
CriterionWeightSource
Amount equals expected fee (±20%)0.35{@link FeeSchedule}
Member number found in {@code Verwendungszweck}0.30{@link Member#getMembershipNumber()}
Counterparty name matches member name0.20{@link Member#getFirstName()} / {@link Member#getLastName()}
IBAN exact match (after BANK_DATA consent)0.15{@link Member#getIban()}
+ * Classification thresholds: ≥ 90 → MATCHED (admin pre-selected), + * 60–89 → SUGGESTED (admin review), < 60 → UNMATCHED. + *

+ * Double-payment safety: if the same member is the best match for + * two or more transactions in one import, all of them are downgraded to + * {@link MatchStatus#SUGGESTED} (even at confidence ≥ 90) so the admin + * must decide which is the real payment and which is a duplicate or a + * payment for a different period. + *

+ * Performance: fee amounts are pre-computed once per matching run, + * keyed by member id, against the import's booking-date context + * (the most frequent booking date in the batch — so a January import of + * December transactions matches December's fee schedule, not today's). + * Per-transaction we early-exit when the amount deviates by more than 20% + * AND no membership number appears in the reference text, avoiding the + * expensive name/IBAN comparisons for the vast majority of pairs. + *

+ * The service is stateless and thread-safe; all state lives in + * the parameters of {@link #matchTransactions(List, UUID, UUID)}. + * It performs no persistence — the returned {@link BankTransaction} + * entities are detached and must be persisted by the calling orchestrator + * (see {@code BankImportService}, Phase 3). + */ +@Service +public class PaymentMatchingService { + + private static final Logger log = LoggerFactory.getLogger(PaymentMatchingService.class); + + // --- Weights (sum = 1.00) — Plan v3 §3.1 --- + static final double W_AMOUNT = 0.35; + static final double W_MEMBER_NUMBER = 0.30; + static final double W_NAME = 0.20; + static final double W_IBAN = 0.15; + + // --- Classification thresholds (aggregate score 0–100) --- + static final int THRESHOLD_AUTO = 90; + static final int THRESHOLD_SUGGEST = 60; + + /** Amount deviation tolerance for "near match" (50% scoring). */ + private static final double AMOUNT_DEVIATION_THRESHOLD = 0.20; + + /** Minimum digit count of a member number that may match as a numeric substring. */ + private static final int MIN_NUMERIC_MATCH_LENGTH = 3; + + /** Threshold (transactions) above which timing diagnostics are logged at INFO. */ + private static final int PERF_LOG_THRESHOLD = 500; + + private final MemberRepository memberRepository; + private final MemberFeeAssignmentRepository feeAssignmentRepository; + private final FeeScheduleRepository feeScheduleRepository; + + public PaymentMatchingService(MemberRepository memberRepository, + MemberFeeAssignmentRepository feeAssignmentRepository, + FeeScheduleRepository feeScheduleRepository) { + this.memberRepository = memberRepository; + this.feeAssignmentRepository = feeAssignmentRepository; + this.feeScheduleRepository = feeScheduleRepository; + } + + // ------------------------------------------------------------------ + // Public API + // ------------------------------------------------------------------ + + /** + * Matches a batch of parsed transactions against the club's active members. + *

+ * Outgoing transactions (negative {@code amountCents}) are returned with + * {@link MatchStatus#UNMATCHED} — they represent expenses and are handled + * by a separate categorisation flow. + * + * @param parsedTransactions parser output (positive = incoming, negative = outgoing) + * @param clubId tenant scope; only active members of this club are considered + * @param sessionId the {@link de.cannamanage.domain.entity.BankImportSession} + * the returned transactions will belong to + * @return detached {@link BankTransaction} entities with + * {@code matchStatus}, {@code matchConfidence} and + * {@code matchedMemberId} populated where applicable — + * not yet persisted + */ + public List matchTransactions(List parsedTransactions, + UUID clubId, + UUID sessionId) { + Objects.requireNonNull(parsedTransactions, "parsedTransactions"); + Objects.requireNonNull(clubId, "clubId"); + Objects.requireNonNull(sessionId, "sessionId"); + + long startNanos = System.nanoTime(); + + // 1. Load all ACTIVE members for the club (single query) + List activeMembers = memberRepository.findByClubIdAndStatus(clubId, MemberStatus.ACTIVE); + + // 2. Determine the booking-date context for fee lookup (most frequent date in the batch) + LocalDate bookingDateContext = pickBookingDateContext(parsedTransactions); + + // 3. Pre-compute fee amounts once + Map expectedAmounts = precomputeFeeAmounts(activeMembers, clubId, bookingDateContext); + + // 4. Build per-member matching contexts (stable order for deterministic tie-break) + List contexts = buildContexts(activeMembers, expectedAmounts); + + // 5. Score every transaction + List result = new ArrayList<>(parsedTransactions.size()); + Map> memberHits = new HashMap<>(); // memberId → indexes into result that matched it + + for (ParsedTransaction parsed : parsedTransactions) { + BankTransaction tx = toEntity(parsed, sessionId, clubId); + + if (parsed.amountCents() <= 0) { + // Outgoing or zero — never a member payment + tx.setMatchStatus(MatchStatus.UNMATCHED); + result.add(tx); + continue; + } + + MatchResult mr = scoreOne(parsed, contexts); + applyMatchResult(tx, mr); + int idx = result.size(); + result.add(tx); + + if (mr.matchedMemberId() != null + && (mr.classification() == MatchStatus.MATCHED + || mr.classification() == MatchStatus.SUGGESTED)) { + memberHits.computeIfAbsent(mr.matchedMemberId(), k -> new ArrayList<>()).add(idx); + } + } + + // 6. Double-payment safety: downgrade ALL hits for any member that matched 2+ transactions + int downgrades = 0; + for (Map.Entry> e : memberHits.entrySet()) { + if (e.getValue().size() < 2) continue; + for (int idx : e.getValue()) { + BankTransaction tx = result.get(idx); + if (tx.getMatchStatus() == MatchStatus.MATCHED) { + tx.setMatchStatus(MatchStatus.SUGGESTED); + downgrades++; + } + } + } + + long durationMs = (System.nanoTime() - startNanos) / 1_000_000L; + if (parsedTransactions.size() >= PERF_LOG_THRESHOLD) { + log.info("Matching {} txns × {} members in {} ms ({} double-payment downgrades)", + parsedTransactions.size(), contexts.size(), durationMs, downgrades); + } else if (log.isDebugEnabled()) { + log.debug("Matching {} txns × {} members in {} ms ({} double-payment downgrades)", + parsedTransactions.size(), contexts.size(), durationMs, downgrades); + } + + return result; + } + + // ------------------------------------------------------------------ + // Internal — context building + // ------------------------------------------------------------------ + + private List buildContexts(List members, Map expectedAmounts) { + List ctx = new ArrayList<>(members.size()); + for (Member m : members) { + String fullName = ((nullToEmpty(m.getFirstName()) + " " + nullToEmpty(m.getLastName())).trim()); + int expected = expectedAmounts.getOrDefault(m.getId(), MemberMatchContext.NO_EXPECTED_AMOUNT); + ctx.add(new MemberMatchContext( + m.getId(), + m.getMembershipNumber(), + fullName, + m.getIban(), + expected + )); + } + // Deterministic tie-break: stable order by member id + ctx.sort(Comparator.comparing(MemberMatchContext::memberId)); + return ctx; + } + + /** + * Picks the most frequent {@code bookingDate} in the batch as the + * temporal context for fee-assignment lookup. Ties resolve to the + * earliest date (favouring the older period in mixed batches). + * Empty batches default to {@link LocalDate#now()}. + */ + static LocalDate pickBookingDateContext(List txns) { + if (txns == null || txns.isEmpty()) return LocalDate.now(); + Map freq = new HashMap<>(); + for (ParsedTransaction t : txns) { + if (t.bookingDate() != null) { + freq.merge(t.bookingDate(), 1, Integer::sum); + } + } + if (freq.isEmpty()) return LocalDate.now(); + return freq.entrySet().stream() + .sorted((a, b) -> { + int cmp = Integer.compare(b.getValue(), a.getValue()); + return cmp != 0 ? cmp : a.getKey().compareTo(b.getKey()); + }) + .findFirst() + .map(Map.Entry::getKey) + .orElse(LocalDate.now()); + } + + /** + * Pre-computes the expected monthly-equivalent fee in cents per member, + * using the assignment valid at {@code bookingDateContext} (not today). + * Members without an assignment for that period are absent from the map. + *

+ * Fee schedules with intervals other than monthly are normalised to a + * per-billing-period amount as recorded on the {@link FeeSchedule} + * itself — the bank transaction is expected to match the schedule's + * raw {@code amountCents}, not a derived monthly figure. + */ + Map precomputeFeeAmounts(List members, UUID clubId, LocalDate bookingDateContext) { + // Bulk-load fee assignments for the club, then index by member + List allAssignments = feeAssignmentRepository.findByClubId(clubId); + Map> byMember = new HashMap<>(); + for (MemberFeeAssignment a : allAssignments) { + byMember.computeIfAbsent(a.getMemberId(), k -> new ArrayList<>()).add(a); + } + + // Bulk-load fee schedules for the club + Map schedulesById = new HashMap<>(); + for (FeeSchedule s : feeScheduleRepository.findByClubId(clubId)) { + schedulesById.put(s.getId(), s); + } + + Map expected = new HashMap<>(members.size()); + for (Member m : members) { + List assignments = byMember.get(m.getId()); + if (assignments == null) continue; + Optional active = assignments.stream() + .filter(a -> isValidAt(a, bookingDateContext)) + .findFirst(); + if (active.isEmpty()) continue; + FeeSchedule fs = schedulesById.get(active.get().getFeeScheduleId()); + if (fs == null || fs.getAmountCents() == null) continue; + expected.put(m.getId(), fs.getAmountCents()); + } + return expected; + } + + private static boolean isValidAt(MemberFeeAssignment a, LocalDate date) { + if (a.getValidFrom() != null && a.getValidFrom().isAfter(date)) return false; + if (a.getValidTo() != null && !a.getValidTo().isAfter(date)) return false; + return true; + } + + // ------------------------------------------------------------------ + // Internal — scoring (one transaction) + // ------------------------------------------------------------------ + + /** + * Scores a parsed transaction against every member context and returns + * the best candidate. Package-private for testing. + */ + MatchResult scoreOne(ParsedTransaction txn, List contexts) { + if (txn.amountCents() <= 0) return MatchResult.unmatched(); + + String normalizedReference = normalize(txn.referenceText()); + String normalizedCounterparty = normalize(txn.counterpartyName()); + String normalizedTxnIban = normalizeIban(txn.counterpartyIban()); + + MemberMatchContext best = null; + int bestScore = -1; + Map bestBreakdown = Map.of(); + + for (MemberMatchContext ctx : contexts) { + // Early-exit: if amount deviation > 20% AND no membership number found in reference, skip. + boolean amountPlausible = isAmountPlausible(txn.amountCents(), ctx.expectedAmountCents()); + boolean memberNumberHit = ctx.memberNumber() != null + && containsMemberNumber(normalizedReference, ctx.memberNumber()); + if (!amountPlausible && !memberNumberHit) { + continue; + } + + int amountScore = scoreAmount(txn.amountCents(), ctx.expectedAmountCents()); + int memberNumberScore = scoreMemberNumber(normalizedReference, ctx.memberNumber()); + int nameScore = scoreName(normalizedReference, normalizedCounterparty, normalize(ctx.fullName())); + int ibanScore = scoreIban(normalizedTxnIban, normalizeIban(ctx.iban())); + + double weighted = W_AMOUNT * amountScore + + W_MEMBER_NUMBER * memberNumberScore + + W_NAME * nameScore + + W_IBAN * ibanScore; + int total = (int) Math.round(weighted); + + if (total > bestScore) { + bestScore = total; + best = ctx; + bestBreakdown = orderedBreakdown(amountScore, memberNumberScore, nameScore, ibanScore); + } + } + + if (best == null || bestScore < THRESHOLD_SUGGEST) { + return MatchResult.unmatched(); + } + + MatchStatus classification = bestScore >= THRESHOLD_AUTO + ? MatchStatus.MATCHED + : MatchStatus.SUGGESTED; + + return new MatchResult(best.memberId(), best.fullName(), bestScore, classification, bestBreakdown); + } + + private static Map orderedBreakdown(int amount, int memberNo, int name, int iban) { + Map m = new LinkedHashMap<>(4); + m.put("amount", amount); + m.put("memberNumber", memberNo); + m.put("name", name); + m.put("iban", iban); + return m; + } + + private static boolean isAmountPlausible(int txnCents, int expectedCents) { + if (expectedCents <= 0) return false; + double deviation = Math.abs(txnCents - expectedCents) / (double) expectedCents; + return deviation <= AMOUNT_DEVIATION_THRESHOLD; + } + + /** Returns 0–100. */ + static int scoreAmount(int txnCents, int expectedCents) { + if (expectedCents <= 0) return 0; + if (txnCents == expectedCents) return 100; + double deviation = Math.abs(txnCents - expectedCents) / (double) expectedCents; + if (deviation <= AMOUNT_DEVIATION_THRESHOLD) return 50; + return 0; + } + + /** + * Returns 0–100. Caller passes the normalized reference text + * (see {@link #normalize}); membership number is upper-cased internally. + */ + static int scoreMemberNumber(String normalizedReference, String memberNumber) { + if (normalizedReference == null || normalizedReference.isEmpty() + || memberNumber == null || memberNumber.isEmpty()) { + return 0; + } + String mn = memberNumber.toUpperCase().trim(); + // Exact "M-0042" / "M0042" style match + if (normalizedReference.toUpperCase().contains(mn)) return 100; + // Numeric-only fallback: avoid 1- or 2-digit false positives (would match years, amounts, etc.) + String numeric = mn.replaceAll("[^0-9]", ""); + if (numeric.length() >= MIN_NUMERIC_MATCH_LENGTH + && normalizedReference.contains(numeric)) { + return 80; + } + return 0; + } + + /** Helper that mirrors the scoring rule for the early-exit check. */ + private static boolean containsMemberNumber(String normalizedReference, String memberNumber) { + return scoreMemberNumber(normalizedReference, memberNumber) > 0; + } + + /** + * Returns 0–100. Compares the parsed counterparty name (preferred) and + * the free-text reference (fallback) against the member's full name. + */ + static int scoreName(String normalizedReference, String normalizedCounterparty, String normalizedMemberName) { + if (normalizedMemberName == null || normalizedMemberName.isEmpty()) return 0; + + String fromCounterparty = scoreNameAgainst(normalizedCounterparty, normalizedMemberName); + String fromReference = scoreNameAgainst(normalizedReference, normalizedMemberName); + + int s1 = bucket(fromCounterparty); + int s2 = bucket(fromReference); + return Math.max(s1, s2); + } + + /** Pure scoring against one haystack — returns the symbolic match strength. */ + private static String scoreNameAgainst(String haystack, String needleFullName) { + if (haystack == null || haystack.isEmpty()) return "none"; + if (haystack.equals(needleFullName)) return "exact"; + if (haystack.contains(needleFullName) || needleFullName.contains(haystack)) return "contains"; + // Last-name only + int sp = needleFullName.lastIndexOf(' '); + if (sp >= 0 && sp < needleFullName.length() - 1) { + String lastName = needleFullName.substring(sp + 1); + if (lastName.length() >= 3 && haystack.contains(lastName)) return "lastname"; + } + return "none"; + } + + private static int bucket(String tag) { + return switch (tag) { + case "exact" -> 100; + case "contains" -> 80; + case "lastname" -> 50; + default -> 0; + }; + } + + /** Returns 0 or 100 — IBAN match is binary. */ + static int scoreIban(String txnIbanNorm, String memberIbanNorm) { + if (txnIbanNorm == null || txnIbanNorm.isEmpty() + || memberIbanNorm == null || memberIbanNorm.isEmpty()) { + return 0; + } + return txnIbanNorm.equalsIgnoreCase(memberIbanNorm) ? 100 : 0; + } + + // ------------------------------------------------------------------ + // Internal — normalisation helpers + // ------------------------------------------------------------------ + + /** + * Normalises German payment text for comparison: + * lower-case, replace umlauts (ä → ae, ö → oe, ü → ue, ß → ss), + * collapse whitespace. Returns the empty string for {@code null}/blank input. + */ + static String normalize(String s) { + if (s == null) return ""; + String lower = s.toLowerCase(); + StringBuilder sb = new StringBuilder(lower.length() + 4); + for (int i = 0; i < lower.length(); i++) { + char c = lower.charAt(i); + switch (c) { + case 'ä' -> sb.append("ae"); + case 'ö' -> sb.append("oe"); + case 'ü' -> sb.append("ue"); + case 'ß' -> sb.append("ss"); + default -> sb.append(c); + } + } + // Collapse runs of whitespace + return sb.toString().replaceAll("\\s+", " ").trim(); + } + + /** Strips all whitespace and upper-cases — never returns {@code null}. */ + static String normalizeIban(String iban) { + if (iban == null) return ""; + return iban.replaceAll("\\s", "").toUpperCase(); + } + + private static String nullToEmpty(String s) { + return s == null ? "" : s; + } + + // ------------------------------------------------------------------ + // Internal — ParsedTransaction → BankTransaction mapping + // ------------------------------------------------------------------ + + private static BankTransaction toEntity(ParsedTransaction p, UUID sessionId, UUID clubId) { + BankTransaction tx = new BankTransaction(); + tx.setSessionId(sessionId); + tx.setClubId(clubId); + tx.setBookingDate(p.bookingDate()); + tx.setValueDate(p.valueDate()); + tx.setAmountCents(p.amountCents()); + tx.setCurrency(p.currency() != null ? p.currency() : "EUR"); + tx.setReferenceText(p.referenceText()); + tx.setCounterpartyName(p.counterpartyName()); + tx.setCounterpartyIban(p.counterpartyIban()); + tx.setBankReference(p.bankReference()); + tx.setMatchStatus(MatchStatus.UNMATCHED); + return tx; + } + + private static void applyMatchResult(BankTransaction tx, MatchResult mr) { + tx.setMatchStatus(mr.classification()); + if (mr.classification() != MatchStatus.UNMATCHED) { + tx.setMatchConfidence(mr.confidence()); + tx.setMatchedMemberId(mr.matchedMemberId()); + } + } + + // Suppress unused-import warning for FeeInterval (kept for future per-interval scoring) + @SuppressWarnings("unused") + private static FeeInterval unusedAnchor() { return null; } +}