feat(sprint10): Phase 2 — Payment matching engine with confidence scoring

This commit is contained in:
Patrick Plate
2026-06-15 17:30:28 +02:00
parent 55110c95af
commit 527e9b1219
3 changed files with 578 additions and 0 deletions
@@ -0,0 +1,36 @@
package de.cannamanage.service.bankimport;
import de.cannamanage.domain.enums.MatchStatus;
import java.util.Map;
import java.util.UUID;
/**
* Sprint 10 — Per-transaction outcome of {@link PaymentMatchingService}.
* <p>
* Exposes the chosen member, the aggregated confidence (0100), the resulting
* {@link MatchStatus} classification and a per-criterion {@code scoreBreakdown}
* for diagnostics / UI hover-tooltips. Returned by
* {@link PaymentMatchingService#scoreAll(java.util.List, java.util.List)} for unit
* tests; the public {@code matchTransactions} API folds these results back into
* {@code BankTransaction} entities for the orchestrator.
*
* @param matchedMemberId chosen member, {@code null} when {@code classification == UNMATCHED}
* @param matchedMemberName chosen member's full name for log/UI display, {@code null} when unmatched
* @param confidence 0100 aggregated weighted score
* @param classification {@link MatchStatus#MATCHED} (≥90), {@link MatchStatus#SUGGESTED} (6089)
* or {@link MatchStatus#UNMATCHED} (&lt;60)
* @param scoreBreakdown criterion → individual 0100 score; useful for explaining a match in the UI
*/
public record MatchResult(
UUID matchedMemberId,
String matchedMemberName,
int confidence,
MatchStatus classification,
Map<String, Integer> scoreBreakdown
) {
public static MatchResult unmatched() {
return new MatchResult(null, null, 0, MatchStatus.UNMATCHED, Map.of());
}
}
@@ -0,0 +1,36 @@
package de.cannamanage.service.bankimport;
import java.util.UUID;
/**
* Sprint 10 — Pre-computed matching context for one member, prepared once per
* import run and re-used across every transaction in
* {@link PaymentMatchingService#matchTransactions}.
* <p>
* Holding the {@code expectedAmountCents} on the context avoids re-querying
* the fee schedule for every (transaction × member) pair and unlocks the
* early-exit optimisation when the bank amount deviates by more than 20%
* from the expected fee.
*
* @param memberId member primary key
* @param memberNumber the {@code Member.membershipNumber} (e.g. {@code "M-0042"})
* @param fullName "{firstName} {lastName}" — already pre-joined for scoring
* @param iban member IBAN if BANK_DATA consent was granted, otherwise {@code null}
* @param expectedAmountCents fee schedule amount valid at the import's booking-date context,
* or {@code -1} if the member has no active fee assignment for that period
*/
record MemberMatchContext(
UUID memberId,
String memberNumber,
String fullName,
String iban,
int expectedAmountCents
) {
/** Sentinel value indicating the member has no fee assignment for the matching period. */
static final int NO_EXPECTED_AMOUNT = -1;
boolean hasExpectedAmount() {
return expectedAmountCents > 0;
}
}
@@ -0,0 +1,506 @@
package de.cannamanage.service.bankimport;
import de.cannamanage.domain.entity.BankTransaction;
import de.cannamanage.domain.entity.FeeSchedule;
import de.cannamanage.domain.entity.Member;
import de.cannamanage.domain.entity.MemberFeeAssignment;
import de.cannamanage.domain.enums.FeeInterval;
import de.cannamanage.domain.enums.MatchStatus;
import de.cannamanage.domain.enums.MemberStatus;
import de.cannamanage.service.repository.FeeScheduleRepository;
import de.cannamanage.service.repository.MemberFeeAssignmentRepository;
import de.cannamanage.service.repository.MemberRepository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.UUID;
/**
* Sprint 10 — Deterministic, in-memory matching engine that pairs parsed
* bank-statement transactions to club members.
* <p>
* The algorithm is a weighted-confidence model with four criteria:
* <table>
* <caption>Scoring weights (sum = 1.00)</caption>
* <tr><th>Criterion</th><th>Weight</th><th>Source</th></tr>
* <tr><td>Amount equals expected fee (±20%)</td><td>0.35</td><td>{@link FeeSchedule}</td></tr>
* <tr><td>Member number found in {@code Verwendungszweck}</td><td>0.30</td><td>{@link Member#getMembershipNumber()}</td></tr>
* <tr><td>Counterparty name matches member name</td><td>0.20</td><td>{@link Member#getFirstName()} / {@link Member#getLastName()}</td></tr>
* <tr><td>IBAN exact match (after BANK_DATA consent)</td><td>0.15</td><td>{@link Member#getIban()}</td></tr>
* </table>
* Classification thresholds: <b>≥ 90 → MATCHED</b> (admin pre-selected),
* <b>6089 → SUGGESTED</b> (admin review), <b>&lt; 60 → UNMATCHED</b>.
* <p>
* <b>Double-payment safety:</b> if the same member is the best match for
* two or more transactions in one import, all of them are downgraded to
* {@link MatchStatus#SUGGESTED} (even at confidence ≥ 90) so the admin
* must decide which is the real payment and which is a duplicate or a
* payment for a different period.
* <p>
* <b>Performance:</b> fee amounts are pre-computed once per matching run,
* keyed by member id, against the import's <em>booking-date context</em>
* (the most frequent booking date in the batch — so a January import of
* December transactions matches December's fee schedule, not today's).
* Per-transaction we early-exit when the amount deviates by more than 20%
* AND no membership number appears in the reference text, avoiding the
* expensive name/IBAN comparisons for the vast majority of pairs.
* <p>
* The service is <b>stateless and thread-safe</b>; all state lives in
* the parameters of {@link #matchTransactions(List, UUID, UUID)}.
* It performs <b>no persistence</b> — the returned {@link BankTransaction}
* entities are detached and must be persisted by the calling orchestrator
* (see {@code BankImportService}, Phase 3).
*/
@Service
public class PaymentMatchingService {
private static final Logger log = LoggerFactory.getLogger(PaymentMatchingService.class);
// --- Weights (sum = 1.00) — Plan v3 §3.1 ---
static final double W_AMOUNT = 0.35;
static final double W_MEMBER_NUMBER = 0.30;
static final double W_NAME = 0.20;
static final double W_IBAN = 0.15;
// --- Classification thresholds (aggregate score 0100) ---
static final int THRESHOLD_AUTO = 90;
static final int THRESHOLD_SUGGEST = 60;
/** Amount deviation tolerance for "near match" (50% scoring). */
private static final double AMOUNT_DEVIATION_THRESHOLD = 0.20;
/** Minimum digit count of a member number that may match as a numeric substring. */
private static final int MIN_NUMERIC_MATCH_LENGTH = 3;
/** Threshold (transactions) above which timing diagnostics are logged at INFO. */
private static final int PERF_LOG_THRESHOLD = 500;
private final MemberRepository memberRepository;
private final MemberFeeAssignmentRepository feeAssignmentRepository;
private final FeeScheduleRepository feeScheduleRepository;
public PaymentMatchingService(MemberRepository memberRepository,
MemberFeeAssignmentRepository feeAssignmentRepository,
FeeScheduleRepository feeScheduleRepository) {
this.memberRepository = memberRepository;
this.feeAssignmentRepository = feeAssignmentRepository;
this.feeScheduleRepository = feeScheduleRepository;
}
// ------------------------------------------------------------------
// Public API
// ------------------------------------------------------------------
/**
* Matches a batch of parsed transactions against the club's active members.
* <p>
* Outgoing transactions (negative {@code amountCents}) are returned with
* {@link MatchStatus#UNMATCHED} — they represent expenses and are handled
* by a separate categorisation flow.
*
* @param parsedTransactions parser output (positive = incoming, negative = outgoing)
* @param clubId tenant scope; only active members of this club are considered
* @param sessionId the {@link de.cannamanage.domain.entity.BankImportSession}
* the returned transactions will belong to
* @return detached {@link BankTransaction} entities with
* {@code matchStatus}, {@code matchConfidence} and
* {@code matchedMemberId} populated where applicable —
* <em>not yet persisted</em>
*/
public List<BankTransaction> matchTransactions(List<ParsedTransaction> parsedTransactions,
UUID clubId,
UUID sessionId) {
Objects.requireNonNull(parsedTransactions, "parsedTransactions");
Objects.requireNonNull(clubId, "clubId");
Objects.requireNonNull(sessionId, "sessionId");
long startNanos = System.nanoTime();
// 1. Load all ACTIVE members for the club (single query)
List<Member> activeMembers = memberRepository.findByClubIdAndStatus(clubId, MemberStatus.ACTIVE);
// 2. Determine the booking-date context for fee lookup (most frequent date in the batch)
LocalDate bookingDateContext = pickBookingDateContext(parsedTransactions);
// 3. Pre-compute fee amounts once
Map<UUID, Integer> expectedAmounts = precomputeFeeAmounts(activeMembers, clubId, bookingDateContext);
// 4. Build per-member matching contexts (stable order for deterministic tie-break)
List<MemberMatchContext> contexts = buildContexts(activeMembers, expectedAmounts);
// 5. Score every transaction
List<BankTransaction> result = new ArrayList<>(parsedTransactions.size());
Map<UUID, List<Integer>> memberHits = new HashMap<>(); // memberId → indexes into result that matched it
for (ParsedTransaction parsed : parsedTransactions) {
BankTransaction tx = toEntity(parsed, sessionId, clubId);
if (parsed.amountCents() <= 0) {
// Outgoing or zero — never a member payment
tx.setMatchStatus(MatchStatus.UNMATCHED);
result.add(tx);
continue;
}
MatchResult mr = scoreOne(parsed, contexts);
applyMatchResult(tx, mr);
int idx = result.size();
result.add(tx);
if (mr.matchedMemberId() != null
&& (mr.classification() == MatchStatus.MATCHED
|| mr.classification() == MatchStatus.SUGGESTED)) {
memberHits.computeIfAbsent(mr.matchedMemberId(), k -> new ArrayList<>()).add(idx);
}
}
// 6. Double-payment safety: downgrade ALL hits for any member that matched 2+ transactions
int downgrades = 0;
for (Map.Entry<UUID, List<Integer>> e : memberHits.entrySet()) {
if (e.getValue().size() < 2) continue;
for (int idx : e.getValue()) {
BankTransaction tx = result.get(idx);
if (tx.getMatchStatus() == MatchStatus.MATCHED) {
tx.setMatchStatus(MatchStatus.SUGGESTED);
downgrades++;
}
}
}
long durationMs = (System.nanoTime() - startNanos) / 1_000_000L;
if (parsedTransactions.size() >= PERF_LOG_THRESHOLD) {
log.info("Matching {} txns × {} members in {} ms ({} double-payment downgrades)",
parsedTransactions.size(), contexts.size(), durationMs, downgrades);
} else if (log.isDebugEnabled()) {
log.debug("Matching {} txns × {} members in {} ms ({} double-payment downgrades)",
parsedTransactions.size(), contexts.size(), durationMs, downgrades);
}
return result;
}
// ------------------------------------------------------------------
// Internal — context building
// ------------------------------------------------------------------
private List<MemberMatchContext> buildContexts(List<Member> members, Map<UUID, Integer> expectedAmounts) {
List<MemberMatchContext> ctx = new ArrayList<>(members.size());
for (Member m : members) {
String fullName = ((nullToEmpty(m.getFirstName()) + " " + nullToEmpty(m.getLastName())).trim());
int expected = expectedAmounts.getOrDefault(m.getId(), MemberMatchContext.NO_EXPECTED_AMOUNT);
ctx.add(new MemberMatchContext(
m.getId(),
m.getMembershipNumber(),
fullName,
m.getIban(),
expected
));
}
// Deterministic tie-break: stable order by member id
ctx.sort(Comparator.comparing(MemberMatchContext::memberId));
return ctx;
}
/**
* Picks the most frequent {@code bookingDate} in the batch as the
* temporal context for fee-assignment lookup. Ties resolve to the
* <em>earliest</em> date (favouring the older period in mixed batches).
* Empty batches default to {@link LocalDate#now()}.
*/
static LocalDate pickBookingDateContext(List<ParsedTransaction> txns) {
if (txns == null || txns.isEmpty()) return LocalDate.now();
Map<LocalDate, Integer> freq = new HashMap<>();
for (ParsedTransaction t : txns) {
if (t.bookingDate() != null) {
freq.merge(t.bookingDate(), 1, Integer::sum);
}
}
if (freq.isEmpty()) return LocalDate.now();
return freq.entrySet().stream()
.sorted((a, b) -> {
int cmp = Integer.compare(b.getValue(), a.getValue());
return cmp != 0 ? cmp : a.getKey().compareTo(b.getKey());
})
.findFirst()
.map(Map.Entry::getKey)
.orElse(LocalDate.now());
}
/**
* Pre-computes the expected monthly-equivalent fee in cents per member,
* using the assignment valid at {@code bookingDateContext} (not today).
* Members without an assignment for that period are absent from the map.
* <p>
* Fee schedules with intervals other than monthly are normalised to a
* <em>per-billing-period</em> amount as recorded on the {@link FeeSchedule}
* itself — the bank transaction is expected to match the schedule's
* raw {@code amountCents}, not a derived monthly figure.
*/
Map<UUID, Integer> precomputeFeeAmounts(List<Member> members, UUID clubId, LocalDate bookingDateContext) {
// Bulk-load fee assignments for the club, then index by member
List<MemberFeeAssignment> allAssignments = feeAssignmentRepository.findByClubId(clubId);
Map<UUID, List<MemberFeeAssignment>> byMember = new HashMap<>();
for (MemberFeeAssignment a : allAssignments) {
byMember.computeIfAbsent(a.getMemberId(), k -> new ArrayList<>()).add(a);
}
// Bulk-load fee schedules for the club
Map<UUID, FeeSchedule> schedulesById = new HashMap<>();
for (FeeSchedule s : feeScheduleRepository.findByClubId(clubId)) {
schedulesById.put(s.getId(), s);
}
Map<UUID, Integer> expected = new HashMap<>(members.size());
for (Member m : members) {
List<MemberFeeAssignment> assignments = byMember.get(m.getId());
if (assignments == null) continue;
Optional<MemberFeeAssignment> active = assignments.stream()
.filter(a -> isValidAt(a, bookingDateContext))
.findFirst();
if (active.isEmpty()) continue;
FeeSchedule fs = schedulesById.get(active.get().getFeeScheduleId());
if (fs == null || fs.getAmountCents() == null) continue;
expected.put(m.getId(), fs.getAmountCents());
}
return expected;
}
private static boolean isValidAt(MemberFeeAssignment a, LocalDate date) {
if (a.getValidFrom() != null && a.getValidFrom().isAfter(date)) return false;
if (a.getValidTo() != null && !a.getValidTo().isAfter(date)) return false;
return true;
}
// ------------------------------------------------------------------
// Internal — scoring (one transaction)
// ------------------------------------------------------------------
/**
* Scores a parsed transaction against every member context and returns
* the best candidate. Package-private for testing.
*/
MatchResult scoreOne(ParsedTransaction txn, List<MemberMatchContext> contexts) {
if (txn.amountCents() <= 0) return MatchResult.unmatched();
String normalizedReference = normalize(txn.referenceText());
String normalizedCounterparty = normalize(txn.counterpartyName());
String normalizedTxnIban = normalizeIban(txn.counterpartyIban());
MemberMatchContext best = null;
int bestScore = -1;
Map<String, Integer> bestBreakdown = Map.of();
for (MemberMatchContext ctx : contexts) {
// Early-exit: if amount deviation > 20% AND no membership number found in reference, skip.
boolean amountPlausible = isAmountPlausible(txn.amountCents(), ctx.expectedAmountCents());
boolean memberNumberHit = ctx.memberNumber() != null
&& containsMemberNumber(normalizedReference, ctx.memberNumber());
if (!amountPlausible && !memberNumberHit) {
continue;
}
int amountScore = scoreAmount(txn.amountCents(), ctx.expectedAmountCents());
int memberNumberScore = scoreMemberNumber(normalizedReference, ctx.memberNumber());
int nameScore = scoreName(normalizedReference, normalizedCounterparty, normalize(ctx.fullName()));
int ibanScore = scoreIban(normalizedTxnIban, normalizeIban(ctx.iban()));
double weighted = W_AMOUNT * amountScore
+ W_MEMBER_NUMBER * memberNumberScore
+ W_NAME * nameScore
+ W_IBAN * ibanScore;
int total = (int) Math.round(weighted);
if (total > bestScore) {
bestScore = total;
best = ctx;
bestBreakdown = orderedBreakdown(amountScore, memberNumberScore, nameScore, ibanScore);
}
}
if (best == null || bestScore < THRESHOLD_SUGGEST) {
return MatchResult.unmatched();
}
MatchStatus classification = bestScore >= THRESHOLD_AUTO
? MatchStatus.MATCHED
: MatchStatus.SUGGESTED;
return new MatchResult(best.memberId(), best.fullName(), bestScore, classification, bestBreakdown);
}
private static Map<String, Integer> orderedBreakdown(int amount, int memberNo, int name, int iban) {
Map<String, Integer> m = new LinkedHashMap<>(4);
m.put("amount", amount);
m.put("memberNumber", memberNo);
m.put("name", name);
m.put("iban", iban);
return m;
}
private static boolean isAmountPlausible(int txnCents, int expectedCents) {
if (expectedCents <= 0) return false;
double deviation = Math.abs(txnCents - expectedCents) / (double) expectedCents;
return deviation <= AMOUNT_DEVIATION_THRESHOLD;
}
/** Returns 0100. */
static int scoreAmount(int txnCents, int expectedCents) {
if (expectedCents <= 0) return 0;
if (txnCents == expectedCents) return 100;
double deviation = Math.abs(txnCents - expectedCents) / (double) expectedCents;
if (deviation <= AMOUNT_DEVIATION_THRESHOLD) return 50;
return 0;
}
/**
* Returns 0100. Caller passes the <em>normalized</em> reference text
* (see {@link #normalize}); membership number is upper-cased internally.
*/
static int scoreMemberNumber(String normalizedReference, String memberNumber) {
if (normalizedReference == null || normalizedReference.isEmpty()
|| memberNumber == null || memberNumber.isEmpty()) {
return 0;
}
String mn = memberNumber.toUpperCase().trim();
// Exact "M-0042" / "M0042" style match
if (normalizedReference.toUpperCase().contains(mn)) return 100;
// Numeric-only fallback: avoid 1- or 2-digit false positives (would match years, amounts, etc.)
String numeric = mn.replaceAll("[^0-9]", "");
if (numeric.length() >= MIN_NUMERIC_MATCH_LENGTH
&& normalizedReference.contains(numeric)) {
return 80;
}
return 0;
}
/** Helper that mirrors the scoring rule for the early-exit check. */
private static boolean containsMemberNumber(String normalizedReference, String memberNumber) {
return scoreMemberNumber(normalizedReference, memberNumber) > 0;
}
/**
* Returns 0100. Compares the parsed counterparty name (preferred) and
* the free-text reference (fallback) against the member's full name.
*/
static int scoreName(String normalizedReference, String normalizedCounterparty, String normalizedMemberName) {
if (normalizedMemberName == null || normalizedMemberName.isEmpty()) return 0;
String fromCounterparty = scoreNameAgainst(normalizedCounterparty, normalizedMemberName);
String fromReference = scoreNameAgainst(normalizedReference, normalizedMemberName);
int s1 = bucket(fromCounterparty);
int s2 = bucket(fromReference);
return Math.max(s1, s2);
}
/** Pure scoring against one haystack — returns the symbolic match strength. */
private static String scoreNameAgainst(String haystack, String needleFullName) {
if (haystack == null || haystack.isEmpty()) return "none";
if (haystack.equals(needleFullName)) return "exact";
if (haystack.contains(needleFullName) || needleFullName.contains(haystack)) return "contains";
// Last-name only
int sp = needleFullName.lastIndexOf(' ');
if (sp >= 0 && sp < needleFullName.length() - 1) {
String lastName = needleFullName.substring(sp + 1);
if (lastName.length() >= 3 && haystack.contains(lastName)) return "lastname";
}
return "none";
}
private static int bucket(String tag) {
return switch (tag) {
case "exact" -> 100;
case "contains" -> 80;
case "lastname" -> 50;
default -> 0;
};
}
/** Returns 0 or 100 — IBAN match is binary. */
static int scoreIban(String txnIbanNorm, String memberIbanNorm) {
if (txnIbanNorm == null || txnIbanNorm.isEmpty()
|| memberIbanNorm == null || memberIbanNorm.isEmpty()) {
return 0;
}
return txnIbanNorm.equalsIgnoreCase(memberIbanNorm) ? 100 : 0;
}
// ------------------------------------------------------------------
// Internal — normalisation helpers
// ------------------------------------------------------------------
/**
* Normalises German payment text for comparison:
* lower-case, replace umlauts (ä → ae, ö → oe, ü → ue, ß → ss),
* collapse whitespace. Returns the empty string for {@code null}/blank input.
*/
static String normalize(String s) {
if (s == null) return "";
String lower = s.toLowerCase();
StringBuilder sb = new StringBuilder(lower.length() + 4);
for (int i = 0; i < lower.length(); i++) {
char c = lower.charAt(i);
switch (c) {
case 'ä' -> sb.append("ae");
case 'ö' -> sb.append("oe");
case 'ü' -> sb.append("ue");
case 'ß' -> sb.append("ss");
default -> sb.append(c);
}
}
// Collapse runs of whitespace
return sb.toString().replaceAll("\\s+", " ").trim();
}
/** Strips all whitespace and upper-cases — never returns {@code null}. */
static String normalizeIban(String iban) {
if (iban == null) return "";
return iban.replaceAll("\\s", "").toUpperCase();
}
private static String nullToEmpty(String s) {
return s == null ? "" : s;
}
// ------------------------------------------------------------------
// Internal — ParsedTransaction → BankTransaction mapping
// ------------------------------------------------------------------
private static BankTransaction toEntity(ParsedTransaction p, UUID sessionId, UUID clubId) {
BankTransaction tx = new BankTransaction();
tx.setSessionId(sessionId);
tx.setClubId(clubId);
tx.setBookingDate(p.bookingDate());
tx.setValueDate(p.valueDate());
tx.setAmountCents(p.amountCents());
tx.setCurrency(p.currency() != null ? p.currency() : "EUR");
tx.setReferenceText(p.referenceText());
tx.setCounterpartyName(p.counterpartyName());
tx.setCounterpartyIban(p.counterpartyIban());
tx.setBankReference(p.bankReference());
tx.setMatchStatus(MatchStatus.UNMATCHED);
return tx;
}
private static void applyMatchResult(BankTransaction tx, MatchResult mr) {
tx.setMatchStatus(mr.classification());
if (mr.classification() != MatchStatus.UNMATCHED) {
tx.setMatchConfidence(mr.confidence());
tx.setMatchedMemberId(mr.matchedMemberId());
}
}
// Suppress unused-import warning for FeeInterval (kept for future per-interval scoring)
@SuppressWarnings("unused")
private static FeeInterval unusedAnchor() { return null; }
}