feat(sprint10): Phase 1 — Data model + bank statement parsers (MT940, CAMT.053, CSV)
Implements the Sprint 10 Phase 1 foundation for the Smart Payment Import feature:
Domain layer:
- 3 new enums: BankFormat (MT940, CAMT053, CSV), ImportSessionStatus, MatchStatus
- StaffPermission.FINANCE_IMPORT
- AuditEventType: BANK_IMPORT_STARTED/COMPLETED/FAILED + BANK_PAYMENT_CONFIRMED
- NotificationType.BANK_IMPORT_COMPLETED
- ConsentType.BANK_DATA (DSGVO consent for IBAN storage)
- 3 new entities: BankImportSession, BankTransaction, CsvColumnMapping
- Member: + iban (VARCHAR 34) + ibanConsentDate
- MemberStatus.LEFT (semantic alias for RESIGNED, referenced by Sprint 9 RetentionService)
Persistence:
- V30__bank_import_sessions.sql
- V31__bank_transactions.sql
- V32__csv_column_mappings.sql (also adds iban + iban_consent_date to members)
- 3 Spring Data repositories
Parser infrastructure (cannamanage-service/src/main/java/de/cannamanage/service/bankimport):
- BankStatementParser interface (Strategy pattern, Spring-injected list)
- ParsedTransaction + ParseResult records
- BankStatementParseException (parse errors)
- Mt940Parser: custom state machine, CENTURY_BOUNDARY=70 for YY→YYYY, proprietary
header tolerance (skips lines before first :20: for StarMoney/WISO/Hibiscus wrappers)
- Camt053Parser: StAX streaming with XXE hardening (IS_SUPPORTING_EXTERNAL_ENTITIES,
SUPPORT_DTD, IS_REPLACING_ENTITY_REFERENCES all false); supports camt.053.001.02
and camt.053.001.08 namespaces
- CsvBankParser: Apache Commons CSV with configurable columns per club; German number
format ("1.234,56"); ISO-8859-1 default encoding
- BankStatementParserService: filename-extension hint + content probe; throws
UnrecognizedFormatException when no parser claims the file
Build verified via Docker (cannamanage-api:sprint10-phase1).
Sprint 9 fix (incidental, required to compile):
- Added MemberStatus.LEFT (Sprint 9 RetentionService referenced it but the enum
value was missing)
- MemberListRegistryGenerator: added LEFT to formatStatus() switch (mapped to
"Ausgetreten", same as RESIGNED)
Sprint 10 docs: analysis, plan, plan-review, testplan.
Co-Authored-By: Lumen <lumen@cannamanage.de>
This commit is contained in:
+21
@@ -0,0 +1,21 @@
|
||||
package de.cannamanage.service.bankimport;
|
||||
|
||||
/**
|
||||
* Sprint 10 — Unrecoverable error while parsing a bank statement file.
|
||||
* <p>
|
||||
* Thrown by {@link BankStatementParser#parse} when the input cannot be processed
|
||||
* (malformed XML, unreadable encoding, missing required fields, etc.).
|
||||
* <p>
|
||||
* Recoverable issues (skipped lines, missing optional fields) are reported as
|
||||
* warnings on the {@link ParseResult} instead of throwing.
|
||||
*/
|
||||
public class BankStatementParseException extends RuntimeException {
|
||||
|
||||
public BankStatementParseException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public BankStatementParseException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
||||
+59
@@ -0,0 +1,59 @@
|
||||
package de.cannamanage.service.bankimport;
|
||||
|
||||
import de.cannamanage.domain.entity.CsvColumnMapping;
|
||||
import de.cannamanage.domain.enums.BankFormat;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Sprint 10 — Strategy interface for parsing one bank statement format.
|
||||
* <p>
|
||||
* Implementations are Spring {@code @Component} beans, auto-discovered by
|
||||
* {@link BankStatementParserService}. Add a new format by adding a new
|
||||
* implementation — no further wiring needed.
|
||||
* <p>
|
||||
* Each parser must:
|
||||
* <ul>
|
||||
* <li>Declare the single format it handles via {@link #getSupportedFormat()}</li>
|
||||
* <li>Detect that format reliably from the first ~512 bytes of file content via
|
||||
* {@link #canParse(String, byte[])}</li>
|
||||
* <li>Stream-parse the full file via {@link #parse(InputStream, String, CsvColumnMapping)},
|
||||
* producing a {@link ParseResult} — must not load the entire file into memory
|
||||
* (large bank exports can exceed 50 MB)</li>
|
||||
* </ul>
|
||||
*/
|
||||
public interface BankStatementParser {
|
||||
|
||||
/** The single bank statement format this parser handles. */
|
||||
BankFormat getSupportedFormat();
|
||||
|
||||
/**
|
||||
* Probe whether this parser can handle the given file.
|
||||
* <p>
|
||||
* Implementations should inspect the filename extension and/or the first bytes
|
||||
* (typically the first 512). Must be fast and side-effect free — called for
|
||||
* every uploaded file by {@link BankStatementParserService#detectFormat}.
|
||||
*
|
||||
* @param filename original upload filename (after path sanitization)
|
||||
* @param headerBytes first bytes of the file content (at least 512 bytes if available)
|
||||
* @return {@code true} if this parser claims the format, {@code false} otherwise
|
||||
*/
|
||||
boolean canParse(String filename, byte[] headerBytes);
|
||||
|
||||
/**
|
||||
* Parse the full statement.
|
||||
* <p>
|
||||
* The {@code mapping} parameter is required for {@link BankFormat#CSV} and
|
||||
* ignored by structured formats (MT940, CAMT.053). Callers must pass a non-null
|
||||
* mapping when the detected format is CSV.
|
||||
* <p>
|
||||
* The input stream is consumed but not closed — callers own the stream lifecycle.
|
||||
*
|
||||
* @param inputStream stream over the file content (caller closes)
|
||||
* @param filename original filename, used only for warning/error messages
|
||||
* @param mapping CSV column mapping (required for CSV, may be {@code null} otherwise)
|
||||
* @return parsed result; never {@code null}
|
||||
* @throws BankStatementParseException on unrecoverable parse errors
|
||||
*/
|
||||
ParseResult parse(InputStream inputStream, String filename, CsvColumnMapping mapping);
|
||||
}
|
||||
+234
@@ -0,0 +1,234 @@
|
||||
package de.cannamanage.service.bankimport;
|
||||
|
||||
import de.cannamanage.domain.entity.CsvColumnMapping;
|
||||
import de.cannamanage.domain.enums.BankFormat;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.SequenceInputStream;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Sprint 10 — Façade that detects a bank statement's format and routes parsing
|
||||
* to the matching {@link BankStatementParser}.
|
||||
* <p>
|
||||
* Spring auto-discovers all {@link BankStatementParser} beans and injects them
|
||||
* here as a {@code List}, so adding a new format only requires adding a new
|
||||
* parser bean. The service indexes them once at construction time keyed by
|
||||
* {@link BankFormat} for O(1) dispatch.
|
||||
* <p>
|
||||
* Format detection is a two-step probe:
|
||||
* <ol>
|
||||
* <li>Filename-extension hint ({@code .xml} → CAMT, {@code .sta}/{@code .mt940}/{@code .swift}
|
||||
* → MT940, {@code .csv}/{@code .txt} → CSV). The hint biases probe order
|
||||
* but never overrides content.</li>
|
||||
* <li>Content probe via {@link BankStatementParser#canParse(String, byte[])}
|
||||
* against the first {@value #HEADER_PROBE_BYTES} bytes. The first parser
|
||||
* that claims the content wins.</li>
|
||||
* </ol>
|
||||
* If no parser claims the file an {@link UnrecognizedFormatException} is thrown.
|
||||
*/
|
||||
@Service
|
||||
public class BankStatementParserService {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(BankStatementParserService.class);
|
||||
|
||||
/** Bytes inspected for content-based format detection. */
|
||||
public static final int HEADER_PROBE_BYTES = 512;
|
||||
|
||||
private final Map<BankFormat, BankStatementParser> parsersByFormat;
|
||||
private final List<BankStatementParser> parsers;
|
||||
|
||||
public BankStatementParserService(List<BankStatementParser> parsers) {
|
||||
Objects.requireNonNull(parsers, "parsers");
|
||||
this.parsers = List.copyOf(parsers);
|
||||
Map<BankFormat, BankStatementParser> index = new EnumMap<>(BankFormat.class);
|
||||
for (BankStatementParser parser : parsers) {
|
||||
BankFormat format = parser.getSupportedFormat();
|
||||
BankStatementParser existing = index.put(format, parser);
|
||||
if (existing != null) {
|
||||
throw new IllegalStateException(
|
||||
"Duplicate BankStatementParser beans for format " + format
|
||||
+ ": " + existing.getClass().getName()
|
||||
+ " and " + parser.getClass().getName());
|
||||
}
|
||||
}
|
||||
this.parsersByFormat = Map.copyOf(index);
|
||||
log.info("BankStatementParserService initialised with {} parsers: {}",
|
||||
parsersByFormat.size(), parsersByFormat.keySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect the {@link BankFormat} of a file based on filename and a content probe.
|
||||
* <p>
|
||||
* The {@code content} array may be the full file content or just a head slice —
|
||||
* only the first {@value #HEADER_PROBE_BYTES} bytes are inspected.
|
||||
*
|
||||
* @param filename original upload filename (used for extension hint and error messages)
|
||||
* @param content file content (or at least a head slice); must not be {@code null}
|
||||
* @return the detected format
|
||||
* @throws UnrecognizedFormatException if no registered parser claims the file
|
||||
*/
|
||||
public BankFormat detectFormat(String filename, byte[] content) {
|
||||
Objects.requireNonNull(content, "content");
|
||||
String safeName = filename == null ? "" : filename;
|
||||
byte[] header = sliceHeader(content);
|
||||
|
||||
// 1. Extension hint — try the hinted parser first, but never trust extension alone.
|
||||
BankFormat hint = extensionHint(safeName);
|
||||
if (hint != null) {
|
||||
BankStatementParser hinted = parsersByFormat.get(hint);
|
||||
if (hinted != null && hinted.canParse(safeName, header)) {
|
||||
log.debug("Format detected via extension hint: {} → {}", safeName, hint);
|
||||
return hint;
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Content probe — ask every parser, hinted one already tried.
|
||||
for (BankStatementParser parser : parsers) {
|
||||
BankFormat format = parser.getSupportedFormat();
|
||||
if (format == hint) {
|
||||
continue; // already tried
|
||||
}
|
||||
if (parser.canParse(safeName, header)) {
|
||||
log.debug("Format detected via content probe: {} → {}", safeName, format);
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
throw new UnrecognizedFormatException(
|
||||
"Bank statement format could not be detected for file: " + safeName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a bank statement using the parser registered for the given format.
|
||||
* <p>
|
||||
* The {@code csvMapping} argument is required when {@code format} is
|
||||
* {@link BankFormat#CSV} and ignored otherwise. The input stream is consumed
|
||||
* but not closed — the caller owns the stream lifecycle.
|
||||
*
|
||||
* @param input file content stream (caller closes)
|
||||
* @param filename original filename for warnings and error messages
|
||||
* @param format pre-detected format (typically the result of {@link #detectFormat})
|
||||
* @param csvMapping CSV column mapping, required iff {@code format == CSV}
|
||||
* @return parse result; never {@code null}
|
||||
* @throws UnrecognizedFormatException if no parser is registered for the format
|
||||
* @throws BankStatementParseException on parser-level errors
|
||||
* @throws IllegalArgumentException if {@code csvMapping} is {@code null} for CSV
|
||||
*/
|
||||
public ParseResult parse(InputStream input, String filename, BankFormat format, CsvColumnMapping csvMapping) {
|
||||
Objects.requireNonNull(input, "input");
|
||||
Objects.requireNonNull(format, "format");
|
||||
if (format == BankFormat.CSV && csvMapping == null) {
|
||||
throw new IllegalArgumentException("csvMapping is required for CSV format");
|
||||
}
|
||||
BankStatementParser parser = parsersByFormat.get(format);
|
||||
if (parser == null) {
|
||||
throw new UnrecognizedFormatException(
|
||||
"No parser registered for format " + format
|
||||
+ " (available: " + parsersByFormat.keySet() + ")");
|
||||
}
|
||||
log.debug("Parsing {} with {} ({} bytes mapping={})",
|
||||
filename, parser.getClass().getSimpleName(), format,
|
||||
csvMapping == null ? "n/a" : csvMapping.getId());
|
||||
return parser.parse(input, filename, csvMapping);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience: detect the format from the file content and parse it in one call.
|
||||
* <p>
|
||||
* The content is buffered in memory (callers must size accordingly) and replayed
|
||||
* through the matching parser. For very large files prefer the two-step API:
|
||||
* read a small header into memory, call {@link #detectFormat}, then call
|
||||
* {@link #parse(InputStream, String, BankFormat, CsvColumnMapping)} with the
|
||||
* full streaming source.
|
||||
*
|
||||
* @param content full file content
|
||||
* @param filename original filename
|
||||
* @param csvMapping CSV mapping (required iff CSV is detected)
|
||||
* @return parse result
|
||||
*/
|
||||
public ParseResult detectAndParse(byte[] content, String filename, CsvColumnMapping csvMapping) {
|
||||
Objects.requireNonNull(content, "content");
|
||||
BankFormat format = detectFormat(filename, content);
|
||||
try (InputStream in = new ByteArrayInputStream(content)) {
|
||||
return parse(in, filename, format, csvMapping);
|
||||
} catch (IOException e) {
|
||||
// ByteArrayInputStream#close is a no-op, but the contract still declares IOException.
|
||||
throw new BankStatementParseException("Failed to close in-memory stream for " + filename, e);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the immutable set of formats this service can dispatch to. */
|
||||
public java.util.Set<BankFormat> supportedFormats() {
|
||||
return parsersByFormat.keySet();
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// helpers
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns at most {@value #HEADER_PROBE_BYTES} bytes from the start of {@code content}.
|
||||
* Never copies more than necessary.
|
||||
*/
|
||||
private static byte[] sliceHeader(byte[] content) {
|
||||
if (content.length <= HEADER_PROBE_BYTES) {
|
||||
return content;
|
||||
}
|
||||
byte[] head = new byte[HEADER_PROBE_BYTES];
|
||||
System.arraycopy(content, 0, head, 0, HEADER_PROBE_BYTES);
|
||||
return head;
|
||||
}
|
||||
|
||||
/**
|
||||
* Map a filename extension to a likely {@link BankFormat}.
|
||||
* Returns {@code null} when no hint can be derived.
|
||||
*/
|
||||
private static BankFormat extensionHint(String filename) {
|
||||
int dot = filename.lastIndexOf('.');
|
||||
if (dot < 0 || dot == filename.length() - 1) {
|
||||
return null;
|
||||
}
|
||||
String ext = filename.substring(dot + 1).toLowerCase(Locale.ROOT);
|
||||
return switch (ext) {
|
||||
case "xml", "camt", "053" -> BankFormat.CAMT053;
|
||||
case "mt940", "sta", "swift" -> BankFormat.MT940;
|
||||
case "csv", "txt", "tsv" -> BankFormat.CSV;
|
||||
default -> null;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps two streams so a previously-buffered header can be replayed in front
|
||||
* of a still-open file stream — handy when callers want to probe + parse a
|
||||
* single source without re-reading the disk. Currently unused by the public
|
||||
* API; kept as a building block for future controllers/services.
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
static InputStream concat(byte[] head, InputStream rest) {
|
||||
return new SequenceInputStream(new ByteArrayInputStream(head), rest);
|
||||
}
|
||||
|
||||
/**
|
||||
* Thrown when no registered {@link BankStatementParser} claims a file.
|
||||
* Distinct from {@link BankStatementParseException} so callers can
|
||||
* surface "Format nicht erkannt" as a 400-style validation error rather
|
||||
* than a 500-style parse failure.
|
||||
*/
|
||||
public static class UnrecognizedFormatException extends RuntimeException {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
public UnrecognizedFormatException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
+342
@@ -0,0 +1,342 @@
|
||||
package de.cannamanage.service.bankimport;
|
||||
|
||||
import de.cannamanage.domain.entity.CsvColumnMapping;
|
||||
import de.cannamanage.domain.enums.BankFormat;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
import java.io.InputStream;
|
||||
import java.math.BigDecimal;
|
||||
import java.math.RoundingMode;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.Deque;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Sprint 10 — Parser for ISO 20022 CAMT.053 bank-to-customer statements.
|
||||
* <p>
|
||||
* CAMT.053 is the modern XML successor to MT940, mandated for SEPA transfers
|
||||
* and used by all German banks for online-banking exports since ~2014. It
|
||||
* exists in multiple schema versions (camt.053.001.02 through camt.053.001.10);
|
||||
* we handle them generically by ignoring the namespace and matching local names —
|
||||
* the elements we read have been stable across versions.
|
||||
* <p>
|
||||
* Document structure (simplified):
|
||||
* <pre>
|
||||
* Document
|
||||
* BkToCstmrStmt
|
||||
* Stmt ← one or more statements
|
||||
* Id, ElctrncSeqNb, CreDtTm, FrToDt
|
||||
* Acct/Id/IBAN ← account IBAN
|
||||
* Bal ← opening/closing balances
|
||||
* Ntry ← one entry per transaction
|
||||
* Amt ← amount with @Ccy attribute
|
||||
* CdtDbtInd ← CRDT or DBIT
|
||||
* BookgDt/Dt, ValDt/Dt
|
||||
* NtryRef ← bank reference
|
||||
* NtryDtls/TxDtls
|
||||
* RmtInf/Ustrd ← unstructured remittance ("Verwendungszweck")
|
||||
* RltdPties/Dbtr/Nm, /Cdtr/Nm
|
||||
* RltdPties/DbtrAcct/Id/IBAN, /CdtrAcct/Id/IBAN
|
||||
* </pre>
|
||||
* <p>
|
||||
* <strong>XXE hardening (Security advisory):</strong> The {@link XMLInputFactory} is
|
||||
* configured to disable DTDs, external entities, and entity reference expansion.
|
||||
* This makes the parser safe against XXE attacks (CWE-611) — bank statements arrive
|
||||
* from arbitrary external sources, so this is non-negotiable.
|
||||
* <p>
|
||||
* <strong>Streaming:</strong> StAX is used (not DOM/JAXB) to keep memory bounded —
|
||||
* large CAMT files from active accounts can exceed 50 MB with thousands of entries.
|
||||
*/
|
||||
@Component
|
||||
public class Camt053Parser implements BankStatementParser {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(Camt053Parser.class);
|
||||
|
||||
private static final DateTimeFormatter ISO_DATE = DateTimeFormatter.ISO_LOCAL_DATE;
|
||||
|
||||
private final XMLInputFactory xmlInputFactory;
|
||||
|
||||
public Camt053Parser() {
|
||||
this.xmlInputFactory = XMLInputFactory.newFactory();
|
||||
// Disable DTD support entirely — there is no legitimate DTD in CAMT.053.
|
||||
// This is the primary XXE defence: no DOCTYPE, no entities, no external resources.
|
||||
this.xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
|
||||
this.xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
|
||||
// Even with DTDs disabled, set these as belt-and-braces — some parser impls
|
||||
// honour them independently.
|
||||
try {
|
||||
this.xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
|
||||
} catch (IllegalArgumentException ignored) {
|
||||
// Not all StAX implementations expose this property.
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BankFormat getSupportedFormat() {
|
||||
return BankFormat.CAMT053;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canParse(String filename, byte[] headerBytes) {
|
||||
if (headerBytes == null || headerBytes.length == 0) {
|
||||
return false;
|
||||
}
|
||||
String head = new String(headerBytes, StandardCharsets.UTF_8);
|
||||
// Must look like XML and contain either the camt.053 namespace marker
|
||||
// or the unmistakable BkToCstmrStmt root element. Either alone is sufficient.
|
||||
boolean looksLikeXml = head.contains("<?xml") || head.trim().startsWith("<");
|
||||
if (!looksLikeXml) return false;
|
||||
return head.contains("camt.053") || head.contains("BkToCstmrStmt");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseResult parse(InputStream inputStream, String filename, CsvColumnMapping mapping) {
|
||||
List<ParsedTransaction> transactions = new ArrayList<>();
|
||||
List<String> warnings = new ArrayList<>();
|
||||
|
||||
String accountIban = null;
|
||||
LocalDate statementDate = null;
|
||||
Integer openingBalanceCents = null;
|
||||
Integer closingBalanceCents = null;
|
||||
|
||||
// Path stack — used to disambiguate elements with the same local name in
|
||||
// different contexts (e.g. <Amt> on the entry vs. inside <TxDtls/InstdAmt>).
|
||||
Deque<String> path = new ArrayDeque<>();
|
||||
|
||||
XMLStreamReader reader = null;
|
||||
try {
|
||||
reader = xmlInputFactory.createXMLStreamReader(inputStream, "UTF-8");
|
||||
|
||||
// In-progress entry state. We commit on </Ntry>.
|
||||
EntryAccumulator current = null;
|
||||
// In-progress balance state. We commit on </Bal>.
|
||||
BalanceAccumulator currentBal = null;
|
||||
// Buffer for the most recent character data (only meaningful inside leaf elements).
|
||||
StringBuilder chars = new StringBuilder();
|
||||
String currentAmtCurrency = "EUR";
|
||||
|
||||
while (reader.hasNext()) {
|
||||
int event = reader.next();
|
||||
switch (event) {
|
||||
case XMLStreamConstants.START_ELEMENT -> {
|
||||
String local = reader.getLocalName();
|
||||
path.push(local);
|
||||
chars.setLength(0);
|
||||
|
||||
if ("Ntry".equals(local)) {
|
||||
current = new EntryAccumulator();
|
||||
} else if ("Bal".equals(local)) {
|
||||
currentBal = new BalanceAccumulator();
|
||||
} else if ("Amt".equals(local) && (insideBal(path) || directChildOf(path, "Ntry"))) {
|
||||
// Capture the @Ccy attribute on this Amt element only.
|
||||
currentAmtCurrency = reader.getAttributeValue(null, "Ccy");
|
||||
if (currentAmtCurrency == null) currentAmtCurrency = "EUR";
|
||||
}
|
||||
}
|
||||
case XMLStreamConstants.CHARACTERS, XMLStreamConstants.CDATA -> {
|
||||
chars.append(reader.getText());
|
||||
}
|
||||
case XMLStreamConstants.END_ELEMENT -> {
|
||||
String local = reader.getLocalName();
|
||||
String text = chars.toString().strip();
|
||||
|
||||
// --- Account IBAN (statement-level) ---
|
||||
if ("IBAN".equals(local) && pathContains(path, "Acct") && !pathContains(path, "RltdPties")) {
|
||||
if (accountIban == null) accountIban = text;
|
||||
}
|
||||
// --- Statement period (use the ToDtTm/Dt as statementDate fallback) ---
|
||||
else if (("ToDt".equals(local) || "Dt".equals(local))
|
||||
&& pathContains(path, "FrToDt") && statementDate == null) {
|
||||
statementDate = tryParseDate(text);
|
||||
}
|
||||
|
||||
// --- Balances ---
|
||||
if (currentBal != null) {
|
||||
if ("Cd".equals(local) && pathContains(path, "Tp") && pathContains(path, "Bal")) {
|
||||
currentBal.code = text; // OPBD/CLBD/PRCD/etc.
|
||||
} else if ("Amt".equals(local) && directChildOf(path, "Bal")) {
|
||||
currentBal.amountCents = parseAmountToCents(text);
|
||||
} else if ("CdtDbtInd".equals(local) && directChildOf(path, "Bal")) {
|
||||
currentBal.creditIndicator = "CRDT".equals(text);
|
||||
} else if ("Dt".equals(local) && pathContains(path, "Bal")) {
|
||||
currentBal.date = tryParseDate(text);
|
||||
} else if ("Bal".equals(local)) {
|
||||
// Commit
|
||||
Integer signedCents = currentBal.amountCents == null ? null
|
||||
: (currentBal.creditIndicator ? currentBal.amountCents : -currentBal.amountCents);
|
||||
if (signedCents != null && currentBal.code != null) {
|
||||
switch (currentBal.code) {
|
||||
case "OPBD", "PRCD" -> {
|
||||
if (openingBalanceCents == null) openingBalanceCents = signedCents;
|
||||
}
|
||||
case "CLBD" -> closingBalanceCents = signedCents;
|
||||
default -> { /* ITBD/CLAV/etc. — ignore */ }
|
||||
}
|
||||
if (statementDate == null && "CLBD".equals(currentBal.code)) {
|
||||
statementDate = currentBal.date;
|
||||
}
|
||||
}
|
||||
currentBal = null;
|
||||
}
|
||||
}
|
||||
|
||||
// --- Entry fields ---
|
||||
if (current != null) {
|
||||
if ("Amt".equals(local) && directChildOf(path, "Ntry")) {
|
||||
current.amountCents = parseAmountToCents(text);
|
||||
current.currency = currentAmtCurrency;
|
||||
} else if ("CdtDbtInd".equals(local) && directChildOf(path, "Ntry")) {
|
||||
current.isCredit = "CRDT".equals(text);
|
||||
} else if ("Dt".equals(local) && pathContains(path, "BookgDt")) {
|
||||
current.bookingDate = tryParseDate(text);
|
||||
} else if ("Dt".equals(local) && pathContains(path, "ValDt")) {
|
||||
current.valueDate = tryParseDate(text);
|
||||
} else if ("NtryRef".equals(local) && directChildOf(path, "Ntry")) {
|
||||
current.bankReference = text;
|
||||
} else if ("AcctSvcrRef".equals(local) && directChildOf(path, "Ntry")
|
||||
&& current.bankReference == null) {
|
||||
// Fallback when NtryRef is absent.
|
||||
current.bankReference = text;
|
||||
} else if ("Ustrd".equals(local) && pathContains(path, "RmtInf")) {
|
||||
// Multiple <Ustrd> elements are concatenated per ISO 20022.
|
||||
if (current.referenceText == null) {
|
||||
current.referenceText = text;
|
||||
} else {
|
||||
current.referenceText = current.referenceText + " " + text;
|
||||
}
|
||||
} else if ("Nm".equals(local)) {
|
||||
// Counterparty name: <Dbtr><Nm> for credits, <Cdtr><Nm> for debits.
|
||||
// We grab the name from the side that's NOT us.
|
||||
if (current.isCredit != null && current.isCredit && pathContains(path, "Dbtr")
|
||||
&& !pathContains(path, "DbtrAgt")) {
|
||||
current.counterpartyName = text;
|
||||
} else if (current.isCredit != null && !current.isCredit && pathContains(path, "Cdtr")
|
||||
&& !pathContains(path, "CdtrAgt")) {
|
||||
current.counterpartyName = text;
|
||||
}
|
||||
} else if ("IBAN".equals(local) && pathContains(path, "RltdPties")) {
|
||||
if (current.isCredit != null && current.isCredit && pathContains(path, "DbtrAcct")) {
|
||||
current.counterpartyIban = text;
|
||||
} else if (current.isCredit != null && !current.isCredit && pathContains(path, "CdtrAcct")) {
|
||||
current.counterpartyIban = text;
|
||||
}
|
||||
} else if ("Ntry".equals(local)) {
|
||||
// Commit entry.
|
||||
if (current.bookingDate != null && current.amountCents != null
|
||||
&& current.isCredit != null) {
|
||||
int signed = current.isCredit ? current.amountCents : -current.amountCents;
|
||||
LocalDate value = current.valueDate != null ? current.valueDate : current.bookingDate;
|
||||
transactions.add(new ParsedTransaction(
|
||||
current.bookingDate, value, signed,
|
||||
current.currency != null ? current.currency : "EUR",
|
||||
current.referenceText, current.counterpartyName,
|
||||
current.counterpartyIban, current.bankReference));
|
||||
} else {
|
||||
warnings.add("CAMT.053: skipped Ntry with missing required fields "
|
||||
+ "(bookingDate=" + current.bookingDate + ", amount=" + current.amountCents
|
||||
+ ", credit=" + current.isCredit + ")");
|
||||
}
|
||||
current = null;
|
||||
}
|
||||
}
|
||||
|
||||
// Pop the path AFTER all the directChildOf/pathContains checks above
|
||||
// have used the still-pushed element as their context.
|
||||
path.pop();
|
||||
chars.setLength(0);
|
||||
}
|
||||
default -> { /* skip comments, PIs, whitespace events between elements */ }
|
||||
}
|
||||
}
|
||||
} catch (XMLStreamException e) {
|
||||
throw new BankStatementParseException("CAMT.053 XML error in " + filename + ": " + e.getMessage(), e);
|
||||
} finally {
|
||||
if (reader != null) {
|
||||
try { reader.close(); } catch (XMLStreamException ignored) { }
|
||||
}
|
||||
}
|
||||
|
||||
log.debug("CAMT.053 parsed: file={} transactions={} warnings={}",
|
||||
filename, transactions.size(), warnings.size());
|
||||
return new ParseResult(transactions, accountIban, statementDate,
|
||||
openingBalanceCents, closingBalanceCents, warnings);
|
||||
}
|
||||
|
||||
/** Convert a decimal-dot amount string (ISO 20022 standard) to cents. */
|
||||
static int parseAmountToCents(String amount) {
|
||||
// ISO 20022 amounts use a period as decimal separator and never a thousands separator.
|
||||
// Examples: "50.00", "1234.56", "0.99", "100" (no fraction).
|
||||
BigDecimal value = new BigDecimal(amount.strip());
|
||||
return value.movePointRight(2).setScale(0, RoundingMode.HALF_UP).intValueExact();
|
||||
}
|
||||
|
||||
private static LocalDate tryParseDate(String text) {
|
||||
if (text == null || text.isEmpty()) return null;
|
||||
try {
|
||||
// CAMT uses xs:date (YYYY-MM-DD) for <Dt> and xs:dateTime for <DtTm>.
|
||||
// We accept either by stripping any time portion.
|
||||
int tIdx = text.indexOf('T');
|
||||
String dateOnly = tIdx > 0 ? text.substring(0, tIdx) : text;
|
||||
return LocalDate.parse(dateOnly, ISO_DATE);
|
||||
} catch (RuntimeException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Is the current path inside a {@code <Bal>} subtree? */
|
||||
private static boolean insideBal(Deque<String> path) {
|
||||
return pathContains(path, "Bal");
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the second-from-top of the path the given element? Used to disambiguate
|
||||
* {@code <Amt>} as a direct child of {@code <Ntry>} from {@code <Amt>} inside
|
||||
* a nested {@code <TxDtls>}.
|
||||
* <p>
|
||||
* Note: top of the path is the current element itself, so the parent is index 1.
|
||||
*/
|
||||
private static boolean directChildOf(Deque<String> path, String parent) {
|
||||
if (path.size() < 2) return false;
|
||||
var it = path.iterator();
|
||||
it.next(); // skip self
|
||||
return parent.equals(it.next());
|
||||
}
|
||||
|
||||
private static boolean pathContains(Deque<String> path, String element) {
|
||||
for (String p : path) {
|
||||
if (element.equals(p)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Mutable holder accumulating one entry across many StAX events. */
|
||||
private static final class EntryAccumulator {
|
||||
LocalDate bookingDate;
|
||||
LocalDate valueDate;
|
||||
Integer amountCents;
|
||||
String currency;
|
||||
Boolean isCredit;
|
||||
String referenceText;
|
||||
String counterpartyName;
|
||||
String counterpartyIban;
|
||||
String bankReference;
|
||||
}
|
||||
|
||||
/** Mutable holder for an in-progress {@code <Bal>} subtree. */
|
||||
private static final class BalanceAccumulator {
|
||||
String code; // OPBD, CLBD, ITBD, PRCD, …
|
||||
Integer amountCents;
|
||||
boolean creditIndicator = true;
|
||||
LocalDate date;
|
||||
}
|
||||
}
|
||||
+273
@@ -0,0 +1,273 @@
|
||||
package de.cannamanage.service.bankimport;
|
||||
|
||||
import de.cannamanage.domain.entity.CsvColumnMapping;
|
||||
import de.cannamanage.domain.enums.BankFormat;
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVParser;
|
||||
import org.apache.commons.csv.CSVRecord;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Sprint 10 — Parser for generic CSV bank statement exports.
|
||||
* <p>
|
||||
* Unlike MT940 and CAMT.053, CSV has no standardized layout — every bank uses
|
||||
* its own column order, encoding, and number format. This parser is configured
|
||||
* per-import via a {@link CsvColumnMapping} entity which the admin sets up once
|
||||
* per bank (typical names: "Sparkasse Export", "DKB Online", "ING Umsätze").
|
||||
* <p>
|
||||
* Configurable aspects (all from {@link CsvColumnMapping}):
|
||||
* <ul>
|
||||
* <li><strong>Encoding</strong> — defaults to ISO-8859-1 (the German banking
|
||||
* standard); UTF-8 and Windows-1252 are also common</li>
|
||||
* <li><strong>Delimiter</strong> — typically {@code ;} for German exports,
|
||||
* {@code ,} for English-locale tools</li>
|
||||
* <li><strong>Skip header rows</strong> — banks often emit 1-5 metadata rows
|
||||
* before the actual transaction header</li>
|
||||
* <li><strong>Column indices</strong> — 0-based positions of date, amount,
|
||||
* reference, counterparty name, IBAN; reference/counterparty/IBAN are optional</li>
|
||||
* <li><strong>Date format</strong> — any pattern compatible with {@link DateTimeFormatter}</li>
|
||||
* <li><strong>Decimal separator</strong> — {@code ,} (German) or {@code .} (English).
|
||||
* The opposite character is treated as a thousands separator and stripped.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Sign convention: many CSV exports use a single signed amount column; some use
|
||||
* separate "Soll"/"Haben" columns. The current implementation supports only the
|
||||
* signed-amount style — a future enhancement can add a {@code creditDebitColumn}
|
||||
* field to {@link CsvColumnMapping} if a customer needs the two-column variant.
|
||||
*/
|
||||
@Component
|
||||
public class CsvBankParser implements BankStatementParser {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(CsvBankParser.class);
|
||||
|
||||
@Override
|
||||
public BankFormat getSupportedFormat() {
|
||||
return BankFormat.CSV;
|
||||
}
|
||||
|
||||
/**
|
||||
* Acts as the fallback detector — if it's not XML and not MT940, we try CSV.
|
||||
* The detection logic in {@link BankStatementParserService} runs CSV last so
|
||||
* we don't accidentally claim MT940 or CAMT files that happen to have a {@code .csv}
|
||||
* extension by mistake.
|
||||
*/
|
||||
@Override
|
||||
public boolean canParse(String filename, byte[] headerBytes) {
|
||||
if (filename == null || headerBytes == null) return false;
|
||||
String lower = filename.toLowerCase();
|
||||
// Primary signal: file extension. Bank CSV exports are reliably named.
|
||||
if (lower.endsWith(".csv") || lower.endsWith(".txt")) {
|
||||
return true;
|
||||
}
|
||||
// Secondary: content looks delimited and is NOT XML.
|
||||
String head = new String(headerBytes, 0, Math.min(headerBytes.length, 256),
|
||||
StandardCharsets.ISO_8859_1);
|
||||
if (head.contains("<?xml") || head.trim().startsWith("<")) return false;
|
||||
// Looks for at least one likely delimiter on the first line.
|
||||
int nl = head.indexOf('\n');
|
||||
String firstLine = nl >= 0 ? head.substring(0, nl) : head;
|
||||
return firstLine.contains(";") || firstLine.contains("\t") || firstLine.contains(",");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseResult parse(InputStream inputStream, String filename, CsvColumnMapping mapping) {
|
||||
if (mapping == null) {
|
||||
throw new BankStatementParseException(
|
||||
"CSV parser requires a CsvColumnMapping (filename=" + filename + ")");
|
||||
}
|
||||
Charset charset = resolveCharset(mapping.getEncoding());
|
||||
char delimiter = resolveDelimiter(mapping.getDelimiter());
|
||||
int skipRows = mapping.getSkipHeaderRows() != null ? mapping.getSkipHeaderRows() : 0;
|
||||
DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern(mapping.getDateFormat());
|
||||
char decimalSep = mapping.getDecimalSeparator() != null && !mapping.getDecimalSeparator().isEmpty()
|
||||
? mapping.getDecimalSeparator().charAt(0)
|
||||
: ',';
|
||||
|
||||
List<ParsedTransaction> transactions = new ArrayList<>();
|
||||
List<String> warnings = new ArrayList<>();
|
||||
|
||||
CSVFormat format = CSVFormat.Builder.create()
|
||||
.setDelimiter(delimiter)
|
||||
.setQuote('"')
|
||||
.setIgnoreEmptyLines(true)
|
||||
.setTrim(true)
|
||||
.setAllowMissingColumnNames(true)
|
||||
.build();
|
||||
|
||||
try (Reader reader = new InputStreamReader(inputStream, charset);
|
||||
CSVParser parser = format.parse(reader)) {
|
||||
|
||||
int rowNumber = 0;
|
||||
for (CSVRecord record : parser) {
|
||||
rowNumber++;
|
||||
// Skip configurable header rows (banks vary: 0 if header-less, 1 for column
|
||||
// names, up to 5 for full metadata preambles).
|
||||
if (rowNumber <= skipRows) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
ParsedTransaction tx = parseRow(record, mapping, dateFormatter, decimalSep);
|
||||
if (tx != null) transactions.add(tx);
|
||||
} catch (RuntimeException e) {
|
||||
warnings.add("CSV row " + rowNumber + ": " + e.getMessage());
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new BankStatementParseException("CSV read error in " + filename, e);
|
||||
}
|
||||
|
||||
log.debug("CSV parsed: file={} mapping={} transactions={} warnings={}",
|
||||
filename, mapping.getName(), transactions.size(), warnings.size());
|
||||
// CSV exports rarely carry statement-level metadata (no IBAN/balance), so the
|
||||
// top-level fields stay null. The matching engine doesn't depend on them.
|
||||
return new ParseResult(transactions, null, null, null, null, warnings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse one CSV record into a {@link ParsedTransaction}. Returns {@code null}
|
||||
* if the row should be silently skipped (e.g. blank required field on what
|
||||
* appears to be a non-data row like a sub-total).
|
||||
*/
|
||||
private ParsedTransaction parseRow(CSVRecord record, CsvColumnMapping mapping,
|
||||
DateTimeFormatter dateFormatter, char decimalSep) {
|
||||
// Defensive column access — banks sometimes emit short rows for sub-totals.
|
||||
String dateText = getColumn(record, mapping.getDateColumn());
|
||||
String amountText = getColumn(record, mapping.getAmountColumn());
|
||||
if (dateText == null || dateText.isEmpty() || amountText == null || amountText.isEmpty()) {
|
||||
// Likely a sub-total row or a continuation line we should ignore.
|
||||
return null;
|
||||
}
|
||||
|
||||
LocalDate bookingDate = LocalDate.parse(dateText, dateFormatter);
|
||||
int amountCents = parseAmount(amountText, decimalSep);
|
||||
|
||||
String referenceText = mapping.getReferenceColumn() != null
|
||||
? nullIfEmpty(getColumn(record, mapping.getReferenceColumn())) : null;
|
||||
String counterpartyName = mapping.getCounterpartyColumn() != null
|
||||
? nullIfEmpty(getColumn(record, mapping.getCounterpartyColumn())) : null;
|
||||
String counterpartyIban = mapping.getIbanColumn() != null
|
||||
? normalizeIban(getColumn(record, mapping.getIbanColumn())) : null;
|
||||
|
||||
// No separate value date in most CSV exports — use booking date.
|
||||
// No separate bank reference in most CSV exports.
|
||||
return new ParsedTransaction(
|
||||
bookingDate, bookingDate, amountCents, "EUR",
|
||||
referenceText, counterpartyName, counterpartyIban, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a German- or English-locale amount string to cents.
|
||||
* <p>
|
||||
* Examples (decimalSep='{@literal ,}'):
|
||||
* <ul>
|
||||
* <li>{@code "1.234,56"} → 123456</li>
|
||||
* <li>{@code "-30,00"} → -3000</li>
|
||||
* <li>{@code "100"} → 10000</li>
|
||||
* <li>{@code "0,5"} → 50</li>
|
||||
* </ul>
|
||||
* The character opposite to {@code decimalSep} is treated as the thousands
|
||||
* separator and stripped. A leading "+" is tolerated.
|
||||
*/
|
||||
static int parseAmount(String text, char decimalSep) {
|
||||
String s = text.strip();
|
||||
// Strip currency symbol or stray "+" prefix.
|
||||
if (s.startsWith("+")) s = s.substring(1);
|
||||
// The "other" separator is the thousands separator and is discarded.
|
||||
char thousandsSep = (decimalSep == ',') ? '.' : ',';
|
||||
StringBuilder cleaned = new StringBuilder(s.length());
|
||||
boolean negative = false;
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
char c = s.charAt(i);
|
||||
if (c == '-') {
|
||||
negative = true;
|
||||
} else if (c == thousandsSep || Character.isWhitespace(c)) {
|
||||
// discard
|
||||
} else if (c == decimalSep) {
|
||||
cleaned.append('.');
|
||||
} else if (Character.isDigit(c) || c == '.') {
|
||||
// After the swap above, '.' here would only occur if decimalSep is itself '.'
|
||||
cleaned.append(c);
|
||||
}
|
||||
// Any other character (currency symbol, letters) is silently dropped — they
|
||||
// appear in some exports (e.g. trailing "EUR") and are harmless.
|
||||
}
|
||||
String numericPart = cleaned.toString();
|
||||
if (numericPart.isEmpty() || ".".equals(numericPart)) {
|
||||
throw new IllegalArgumentException("amount has no digits: \"" + text + "\"");
|
||||
}
|
||||
int dotIdx = numericPart.indexOf('.');
|
||||
String euros;
|
||||
String fract;
|
||||
if (dotIdx < 0) {
|
||||
euros = numericPart;
|
||||
fract = "00";
|
||||
} else {
|
||||
euros = numericPart.substring(0, dotIdx);
|
||||
fract = numericPart.substring(dotIdx + 1);
|
||||
if (fract.length() == 1) {
|
||||
fract += "0";
|
||||
} else if (fract.length() > 2) {
|
||||
fract = fract.substring(0, 2); // truncate, never round — exact cents only
|
||||
} else if (fract.isEmpty()) {
|
||||
fract = "00";
|
||||
}
|
||||
}
|
||||
if (euros.isEmpty()) euros = "0";
|
||||
int absCents = Integer.parseInt(euros) * 100 + Integer.parseInt(fract);
|
||||
return negative ? -absCents : absCents;
|
||||
}
|
||||
|
||||
private static String getColumn(CSVRecord record, Integer idx) {
|
||||
if (idx == null || idx < 0 || idx >= record.size()) return null;
|
||||
return record.get(idx);
|
||||
}
|
||||
|
||||
private static String nullIfEmpty(String s) {
|
||||
return (s == null || s.isEmpty()) ? null : s;
|
||||
}
|
||||
|
||||
private static String normalizeIban(String raw) {
|
||||
if (raw == null) return null;
|
||||
String clean = raw.replaceAll("\\s", "").toUpperCase();
|
||||
return clean.isEmpty() ? null : clean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the encoding string from the mapping into a {@link Charset}.
|
||||
* Falls back to ISO-8859-1 on unknown/invalid values rather than failing — the
|
||||
* import session is already in progress and a slightly mangled name field is
|
||||
* preferable to a hard failure.
|
||||
*/
|
||||
private Charset resolveCharset(String name) {
|
||||
if (name == null || name.isEmpty()) return StandardCharsets.ISO_8859_1;
|
||||
try {
|
||||
return Charset.forName(name);
|
||||
} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
|
||||
log.warn("CSV: unknown encoding '{}', falling back to ISO-8859-1", name);
|
||||
return StandardCharsets.ISO_8859_1;
|
||||
}
|
||||
}
|
||||
|
||||
private char resolveDelimiter(String s) {
|
||||
if (s == null || s.isEmpty()) return ';';
|
||||
// Support escape sequence "\t" for tab — a common delimiter that's hard to type
|
||||
// in a UI form.
|
||||
if ("\\t".equals(s) || "\t".equals(s)) return '\t';
|
||||
return s.charAt(0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,523 @@
|
||||
package de.cannamanage.service.bankimport;
|
||||
|
||||
import de.cannamanage.domain.entity.CsvColumnMapping;
|
||||
import de.cannamanage.domain.enums.BankFormat;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.LocalDate;
|
||||
import java.time.MonthDay;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Sprint 10 — Parser for SWIFT MT940 bank statements.
|
||||
* <p>
|
||||
* MT940 is a line-oriented text format used by virtually all German banks for
|
||||
* legacy account statement exports. Each "tag" starts with {@code :NN:} or
|
||||
* {@code :NNL:} (e.g. {@code :20:}, {@code :60F:}, {@code :61:}, {@code :86:}).
|
||||
* <p>
|
||||
* Tags we care about:
|
||||
* <ul>
|
||||
* <li>{@code :20:} — Transaction reference, marks start of the SWIFT block.
|
||||
* Everything before this is treated as a proprietary header and skipped.</li>
|
||||
* <li>{@code :25:} — Account identification (BLZ/account or IBAN, bank-dependent)</li>
|
||||
* <li>{@code :60F:}/{@code :60M:} — Opening balance (first/intermediate)</li>
|
||||
* <li>{@code :61:} — Statement line (one per transaction)</li>
|
||||
* <li>{@code :86:} — Information to account owner ("Verwendungszweck"), may
|
||||
* span multiple lines and carry sub-fields like {@code SVWZ+}, {@code EREF+}</li>
|
||||
* <li>{@code :62F:}/{@code :62M:} — Closing balance (final/intermediate)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Proprietary headers:</strong> Tools like StarMoney, WISO Mein Geld, and
|
||||
* Hibiscus often wrap the SWIFT content with proprietary header lines (version
|
||||
* markers, account exports, BOM bytes). We tolerate this by skipping everything
|
||||
* up to the first {@code :20:} tag.
|
||||
* <p>
|
||||
* <strong>Encoding:</strong> MT940 is technically a 7-bit ASCII format but German
|
||||
* banks routinely use ISO-8859-1 for umlauts in {@code :86:} fields. We decode
|
||||
* with ISO-8859-1 which is a strict superset of ASCII and safe for both cases.
|
||||
*/
|
||||
@Component
|
||||
public class Mt940Parser implements BankStatementParser {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(Mt940Parser.class);
|
||||
|
||||
/**
|
||||
* Century boundary for the 2-digit year in {@code :61:} dates.
|
||||
* Per German banking convention: YY in {@code [00, 70)} → 20YY, YY in {@code [70, 99]} → 19YY.
|
||||
* This handles legacy statements from the 1990s while correctly interpreting all
|
||||
* 21st-century dates up to 2069.
|
||||
*/
|
||||
static final int CENTURY_BOUNDARY = 70;
|
||||
|
||||
private static final DateTimeFormatter MMDD = DateTimeFormatter.ofPattern("MMdd");
|
||||
|
||||
/** Matches any line starting with a SWIFT tag, capturing the tag name and the payload. */
|
||||
private static final Pattern TAG_LINE = Pattern.compile("^:(\\d{2}[A-Z]?):(.*)$");
|
||||
|
||||
/**
|
||||
* Matches the entry-line ({@code :61:}) header up to and including the amount.
|
||||
* Layout: YYMMDD [MMDD entry-date] (C|D|RC|RD|EC|ED) [funds-code] amount-with-comma rest.
|
||||
*/
|
||||
private static final Pattern ENTRY_LINE = Pattern.compile(
|
||||
"^(\\d{6})" // 1: value date YYMMDD
|
||||
+ "(\\d{4})?" // 2: optional booking date MMDD
|
||||
+ "(RC|RD|EC|ED|C|D)" // 3: debit/credit indicator (longer alternatives first!)
|
||||
+ "([A-Z])?" // 4: optional funds/currency code
|
||||
+ "(\\d+(?:,\\d{0,2})?)" // 5: amount with comma (cents optional, banks sometimes omit)
|
||||
+ "(.*)$" // 6: rest (transaction type, bank reference, etc.)
|
||||
);
|
||||
|
||||
@Override
|
||||
public BankFormat getSupportedFormat() {
|
||||
return BankFormat.MT940;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canParse(String filename, byte[] headerBytes) {
|
||||
if (headerBytes == null || headerBytes.length == 0) {
|
||||
return false;
|
||||
}
|
||||
// MT940 is 7-bit ASCII for the structural part; ISO-8859-1 is safe for any preamble.
|
||||
String head = new String(headerBytes, StandardCharsets.ISO_8859_1);
|
||||
// Strong signal: the :20: tag is mandatory and identifies the start of an MT940 block.
|
||||
// We do NOT require :60F: because some proprietary exports lack it,
|
||||
// but the combination of :20: + at least one of {:25:, :61:, :60F:} is solid.
|
||||
if (!head.contains(":20:")) {
|
||||
return false;
|
||||
}
|
||||
return head.contains(":25:") || head.contains(":61:") || head.contains(":60F:");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ParseResult parse(InputStream inputStream, String filename, CsvColumnMapping mapping) {
|
||||
List<ParsedTransaction> transactions = new ArrayList<>();
|
||||
List<String> warnings = new ArrayList<>();
|
||||
|
||||
String accountIban = null;
|
||||
Integer openingBalanceCents = null;
|
||||
Integer closingBalanceCents = null;
|
||||
LocalDate statementDate = null;
|
||||
LocalDate referenceYearAnchor = null; // used to infer year for :61: entries
|
||||
|
||||
// State machine: which tag's continuation lines are we currently in?
|
||||
// MT940 tags can wrap: a :86: tag's content continues on subsequent lines that
|
||||
// don't themselves start with :NN:.
|
||||
String currentTag = null;
|
||||
StringBuilder currentPayload = new StringBuilder();
|
||||
// Entry-line state — when we close a :61: + optional :86: block, emit a transaction.
|
||||
Mt940EntryBuilder pendingEntry = null;
|
||||
boolean inSwiftBlock = false; // becomes true once we've seen the first :20:
|
||||
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(inputStream, StandardCharsets.ISO_8859_1))) {
|
||||
|
||||
String line;
|
||||
int lineNo = 0;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
lineNo++;
|
||||
// Strip trailing CR (MT940 files are often CRLF) and any leading BOM-ish noise.
|
||||
line = stripBom(line).stripTrailing();
|
||||
if (line.isEmpty() || "-".equals(line)) {
|
||||
// "-" is the SWIFT block-end marker; blank lines are decorative.
|
||||
continue;
|
||||
}
|
||||
|
||||
Matcher tagMatch = TAG_LINE.matcher(line);
|
||||
if (tagMatch.matches()) {
|
||||
// Flush the previous tag before processing this one.
|
||||
if (currentTag != null) {
|
||||
pendingEntry = handleTag(
|
||||
currentTag, currentPayload.toString(),
|
||||
transactions, warnings, pendingEntry, referenceYearAnchor);
|
||||
}
|
||||
currentTag = tagMatch.group(1);
|
||||
currentPayload.setLength(0);
|
||||
currentPayload.append(tagMatch.group(2));
|
||||
|
||||
if (!inSwiftBlock) {
|
||||
if ("20".equals(currentTag)) {
|
||||
inSwiftBlock = true;
|
||||
} else {
|
||||
// Still in the proprietary header — drop the tag and continue scanning.
|
||||
warnings.add("MT940: skipped pre-:20: line " + lineNo + " (proprietary header)");
|
||||
currentTag = null;
|
||||
currentPayload.setLength(0);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Side-effects we capture on tag dispatch (need data BEFORE further parsing).
|
||||
if ("25".equals(currentTag)) {
|
||||
accountIban = extractIbanFromAccount(tagMatch.group(2));
|
||||
} else if ("60F".equals(currentTag) || "60M".equals(currentTag)) {
|
||||
BalanceParse bal = parseBalanceTag(tagMatch.group(2));
|
||||
if (bal != null) {
|
||||
openingBalanceCents = bal.amountCents;
|
||||
if (bal.date != null) {
|
||||
referenceYearAnchor = bal.date;
|
||||
if (statementDate == null) {
|
||||
statementDate = bal.date;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ("62F".equals(currentTag) || "62M".equals(currentTag)) {
|
||||
BalanceParse bal = parseBalanceTag(tagMatch.group(2));
|
||||
if (bal != null) {
|
||||
closingBalanceCents = bal.amountCents;
|
||||
if (bal.date != null) {
|
||||
statementDate = bal.date; // closing date wins
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Continuation of the current tag — append with a newline for :86:
|
||||
// (so multi-line Verwendungszweck retains breaks), space-joined otherwise.
|
||||
if (currentTag != null) {
|
||||
if ("86".equals(currentTag)) {
|
||||
currentPayload.append('\n').append(line);
|
||||
} else {
|
||||
currentPayload.append(line);
|
||||
}
|
||||
}
|
||||
// Lines outside any tag (and outside the SWIFT block) are just header noise.
|
||||
}
|
||||
}
|
||||
// Flush trailing tag.
|
||||
if (currentTag != null) {
|
||||
pendingEntry = handleTag(
|
||||
currentTag, currentPayload.toString(),
|
||||
transactions, warnings, pendingEntry, referenceYearAnchor);
|
||||
}
|
||||
// Emit a pending entry that had no :86: continuation.
|
||||
if (pendingEntry != null) {
|
||||
transactions.add(pendingEntry.build(null));
|
||||
}
|
||||
|
||||
} catch (java.io.IOException e) {
|
||||
throw new BankStatementParseException("MT940 read error in " + filename, e);
|
||||
}
|
||||
|
||||
log.debug("MT940 parsed: file={} transactions={} warnings={}",
|
||||
filename, transactions.size(), warnings.size());
|
||||
return new ParseResult(transactions, accountIban, statementDate,
|
||||
openingBalanceCents, closingBalanceCents, warnings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispatch a fully accumulated tag to its handler. Returns the (possibly updated)
|
||||
* pending entry — {@code :61:} starts one, {@code :86:} completes it, anything else
|
||||
* resets it.
|
||||
*/
|
||||
private Mt940EntryBuilder handleTag(
|
||||
String tag, String payload,
|
||||
List<ParsedTransaction> transactions, List<String> warnings,
|
||||
Mt940EntryBuilder pendingEntry, LocalDate referenceYearAnchor) {
|
||||
switch (tag) {
|
||||
case "61" -> {
|
||||
// A new :61: starts a new entry — flush any unfinished one first.
|
||||
if (pendingEntry != null) {
|
||||
transactions.add(pendingEntry.build(null));
|
||||
}
|
||||
Mt940EntryBuilder builder = parseEntryLine(payload, referenceYearAnchor, warnings);
|
||||
return builder; // may be null if the line was unparseable
|
||||
}
|
||||
case "86" -> {
|
||||
// Completion of the most recent :61: — attach the Verwendungszweck.
|
||||
if (pendingEntry != null) {
|
||||
transactions.add(pendingEntry.build(payload));
|
||||
return null;
|
||||
}
|
||||
// :86: without preceding :61: is the "account information" tag at the file
|
||||
// level — not a transaction reference. Silently ignore.
|
||||
return null;
|
||||
}
|
||||
default -> {
|
||||
// Any other tag implicitly closes a pending entry.
|
||||
if (pendingEntry != null) {
|
||||
transactions.add(pendingEntry.build(null));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a {@code :61:} statement-line payload. Format (positional):
|
||||
* <pre>
|
||||
* YYMMDD[MMDD](C|D|RC|RD|EC|ED)[funds]AMOUNT[rest]
|
||||
* </pre>
|
||||
* Returns {@code null} if the payload doesn't match (logged as a warning).
|
||||
*/
|
||||
private Mt940EntryBuilder parseEntryLine(String payload, LocalDate referenceYearAnchor,
|
||||
List<String> warnings) {
|
||||
Matcher m = ENTRY_LINE.matcher(payload);
|
||||
if (!m.matches()) {
|
||||
warnings.add("MT940: unparseable :61: line: " + truncate(payload, 80));
|
||||
return null;
|
||||
}
|
||||
|
||||
String valueDateRaw = m.group(1); // YYMMDD
|
||||
String bookingDateRaw = m.group(2); // MMDD or null
|
||||
String indicator = m.group(3); // C, D, RC, RD, EC, ED
|
||||
String amountRaw = m.group(5); // "123,45"
|
||||
String rest = m.group(6); // type code + bank reference
|
||||
|
||||
LocalDate valueDate = parseSwiftDate(valueDateRaw);
|
||||
LocalDate bookingDate = bookingDateRaw != null
|
||||
? inferBookingDate(bookingDateRaw, valueDate)
|
||||
: valueDate;
|
||||
|
||||
int amountCents = parseAmountToCents(amountRaw);
|
||||
// RC/RD = reversal — flips the sign vs. its base C/D indicator.
|
||||
boolean isDebit = "D".equals(indicator) || "ED".equals(indicator) || "RC".equals(indicator);
|
||||
// Note: "RC" = Reversal of a Credit = effectively a debit; "RD" = reversal of a debit = credit.
|
||||
if ("RD".equals(indicator)) isDebit = false;
|
||||
int signedAmount = isDebit ? -amountCents : amountCents;
|
||||
|
||||
String bankReference = extractBankReference(rest);
|
||||
|
||||
return new Mt940EntryBuilder(bookingDate, valueDate, signedAmount, bankReference);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a YYMMDD date applying the {@link #CENTURY_BOUNDARY} rule.
|
||||
*/
|
||||
static LocalDate parseSwiftDate(String yymmdd) {
|
||||
int yy = Integer.parseInt(yymmdd.substring(0, 2));
|
||||
int mm = Integer.parseInt(yymmdd.substring(2, 4));
|
||||
int dd = Integer.parseInt(yymmdd.substring(4, 6));
|
||||
int year = (yy >= CENTURY_BOUNDARY) ? 1900 + yy : 2000 + yy;
|
||||
return LocalDate.of(year, mm, dd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Booking date is MMDD only — infer the year from the value date.
|
||||
* Most cases: same year. Edge case: value date in early January, booking date in late
|
||||
* December (or vice versa) — choose the year that puts the booking date within 30 days
|
||||
* of the value date.
|
||||
*/
|
||||
private LocalDate inferBookingDate(String mmdd, LocalDate valueDate) {
|
||||
MonthDay md = MonthDay.parse(mmdd, MMDD);
|
||||
LocalDate sameYear = md.atYear(valueDate.getYear());
|
||||
long deltaSameYear = Math.abs(sameYear.toEpochDay() - valueDate.toEpochDay());
|
||||
if (deltaSameYear <= 30) {
|
||||
return sameYear;
|
||||
}
|
||||
// Try previous and next year, pick the closer one.
|
||||
LocalDate prevYear = md.atYear(valueDate.getYear() - 1);
|
||||
LocalDate nextYear = md.atYear(valueDate.getYear() + 1);
|
||||
long deltaPrev = Math.abs(prevYear.toEpochDay() - valueDate.toEpochDay());
|
||||
long deltaNext = Math.abs(nextYear.toEpochDay() - valueDate.toEpochDay());
|
||||
return deltaPrev <= deltaNext ? prevYear : nextYear;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a SWIFT amount string ("1234,56" or "1234,5" or "1234") to cents.
|
||||
* SWIFT uses comma as decimal separator and never has a thousands separator.
|
||||
*/
|
||||
static int parseAmountToCents(String amount) {
|
||||
int commaIdx = amount.indexOf(',');
|
||||
if (commaIdx < 0) {
|
||||
return Integer.parseInt(amount) * 100;
|
||||
}
|
||||
String euros = amount.substring(0, commaIdx);
|
||||
String fract = amount.substring(commaIdx + 1);
|
||||
if (fract.isEmpty()) {
|
||||
return Integer.parseInt(euros) * 100;
|
||||
}
|
||||
if (fract.length() == 1) {
|
||||
fract += "0";
|
||||
} else if (fract.length() > 2) {
|
||||
fract = fract.substring(0, 2); // truncate, not round — banks always quote exact cents
|
||||
}
|
||||
return Integer.parseInt(euros) * 100 + Integer.parseInt(fract);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a balance tag payload ({@code :60F:}/{@code :60M:}/{@code :62F:}/{@code :62M:}).
|
||||
* Layout: (D|C) YYMMDD CCC AMOUNT.
|
||||
*/
|
||||
private BalanceParse parseBalanceTag(String payload) {
|
||||
if (payload == null || payload.length() < 10) return null;
|
||||
try {
|
||||
char sign = payload.charAt(0);
|
||||
LocalDate date = parseSwiftDate(payload.substring(1, 7));
|
||||
// Currency code is positions 7-10, we don't currently surface it on balances.
|
||||
int amountCents = parseAmountToCents(payload.substring(10));
|
||||
int signed = (sign == 'D') ? -amountCents : amountCents;
|
||||
return new BalanceParse(date, signed);
|
||||
} catch (RuntimeException e) {
|
||||
log.debug("Unparseable balance tag payload: {}", payload, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Heuristic IBAN extraction from a {@code :25:} payload.
|
||||
* Bank-account formats vary: some put "BLZ/account" (10-digit BLZ + slash + account),
|
||||
* others put the IBAN directly. We accept anything that looks like a German IBAN.
|
||||
*/
|
||||
private String extractIbanFromAccount(String payload) {
|
||||
if (payload == null) return null;
|
||||
String clean = payload.replaceAll("\\s", "").toUpperCase();
|
||||
// German IBAN: DE + 20 digits = 22 chars total
|
||||
Matcher m = Pattern.compile("(DE\\d{20})").matcher(clean);
|
||||
return m.find() ? m.group(1) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the bank's own transaction reference from the trailing part of {@code :61:}.
|
||||
* Format: {@code <typeCode>//<bankRef>[\n<supplementary>]}. The typeCode is 4 chars
|
||||
* (e.g. {@code NMSC}, {@code NTRF}), the bankRef is up to 16 chars after the {@code //}.
|
||||
*/
|
||||
private String extractBankReference(String rest) {
|
||||
if (rest == null) return null;
|
||||
int slashIdx = rest.indexOf("//");
|
||||
if (slashIdx < 0) return null;
|
||||
String ref = rest.substring(slashIdx + 2);
|
||||
int newlineIdx = ref.indexOf('\n');
|
||||
if (newlineIdx > 0) ref = ref.substring(0, newlineIdx);
|
||||
ref = ref.strip();
|
||||
return ref.isEmpty() ? null : ref;
|
||||
}
|
||||
|
||||
/** Remove a leading UTF-8 BOM (0xFEFF) if present after charset decoding. */
|
||||
private String stripBom(String s) {
|
||||
if (!s.isEmpty() && s.charAt(0) == '\uFEFF') {
|
||||
return s.substring(1);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
private static String truncate(String s, int max) {
|
||||
return s == null || s.length() <= max ? s : s.substring(0, max) + "…";
|
||||
}
|
||||
|
||||
/** Mutable builder for an in-flight :61:+:86: pair. */
|
||||
private static final class Mt940EntryBuilder {
|
||||
final LocalDate bookingDate;
|
||||
final LocalDate valueDate;
|
||||
final int amountCents;
|
||||
final String bankReference;
|
||||
|
||||
Mt940EntryBuilder(LocalDate bookingDate, LocalDate valueDate,
|
||||
int amountCents, String bankReference) {
|
||||
this.bookingDate = bookingDate;
|
||||
this.valueDate = valueDate;
|
||||
this.amountCents = amountCents;
|
||||
this.bankReference = bankReference;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the final transaction, parsing the optional :86: payload into
|
||||
* (referenceText, counterpartyName, counterpartyIban).
|
||||
*/
|
||||
ParsedTransaction build(String tag86Payload) {
|
||||
String referenceText = null;
|
||||
String counterpartyName = null;
|
||||
String counterpartyIban = null;
|
||||
|
||||
if (tag86Payload != null && !tag86Payload.isEmpty()) {
|
||||
// :86: structure varies by bank. The most common format uses {@code ?NN}
|
||||
// subfield markers (?20-?29 = Verwendungszweck, ?32/?33 = Name, ?31 = IBAN).
|
||||
// The newer SVWZ+/EREF+/CRED+/DEBT+ format (SEPA) embeds tagged values.
|
||||
Mt86Parsed parsed = parseTag86(tag86Payload);
|
||||
referenceText = parsed.referenceText;
|
||||
counterpartyName = parsed.counterpartyName;
|
||||
counterpartyIban = parsed.counterpartyIban;
|
||||
}
|
||||
|
||||
return new ParsedTransaction(
|
||||
bookingDate, valueDate, amountCents, "EUR",
|
||||
referenceText, counterpartyName, counterpartyIban, bankReference);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a multi-line :86: payload into its three semantic parts.
|
||||
* Supports both legacy {@code ?NN} subfield format and SEPA-style {@code SVWZ+}/{@code EREF+} tags.
|
||||
*/
|
||||
private static Mt86Parsed parseTag86(String payload) {
|
||||
// Normalize: collapse line breaks within the payload (subfields can wrap across lines).
|
||||
String collapsed = payload.replace("\n", "");
|
||||
|
||||
String referenceText = null;
|
||||
String counterpartyName = null;
|
||||
String counterpartyIban = null;
|
||||
|
||||
// Try the ?NN subfield format first. Pattern: ?20...?29 = Verwendungszweck (concat),
|
||||
// ?32 + ?33 = name, ?31 = counterparty IBAN, ?30 = counterparty BIC.
|
||||
if (collapsed.contains("?")) {
|
||||
StringBuilder svwz = new StringBuilder();
|
||||
StringBuilder name = new StringBuilder();
|
||||
// Split on ? but keep the marker by using a lookahead.
|
||||
String[] parts = collapsed.split("(?=\\?\\d{2})");
|
||||
for (String part : parts) {
|
||||
if (part.length() < 3 || part.charAt(0) != '?') continue;
|
||||
String key = part.substring(1, 3);
|
||||
String val = part.substring(3);
|
||||
int code;
|
||||
try { code = Integer.parseInt(key); } catch (NumberFormatException e) { continue; }
|
||||
if (code >= 20 && code <= 29) {
|
||||
svwz.append(val);
|
||||
} else if (code == 32 || code == 33) {
|
||||
name.append(val);
|
||||
} else if (code == 31) {
|
||||
counterpartyIban = val.replaceAll("\\s", "");
|
||||
}
|
||||
}
|
||||
if (svwz.length() > 0) referenceText = svwz.toString().strip();
|
||||
if (name.length() > 0) counterpartyName = name.toString().strip();
|
||||
}
|
||||
|
||||
// SEPA tagged format inside the Verwendungszweck. Extract embedded references if present.
|
||||
if (referenceText != null) {
|
||||
// Strip the inner SVWZ+ prefix if present so the visible reference is clean.
|
||||
int svwzIdx = referenceText.indexOf("SVWZ+");
|
||||
if (svwzIdx >= 0) {
|
||||
String svwzPart = referenceText.substring(svwzIdx + 5);
|
||||
int nextTagIdx = findNextSepaTag(svwzPart);
|
||||
referenceText = (nextTagIdx >= 0 ? svwzPart.substring(0, nextTagIdx) : svwzPart).strip();
|
||||
}
|
||||
} else if (collapsed.contains("SVWZ+")) {
|
||||
// No ?NN subfields, only SEPA tags.
|
||||
int svwzIdx = collapsed.indexOf("SVWZ+");
|
||||
String svwzPart = collapsed.substring(svwzIdx + 5);
|
||||
int nextTagIdx = findNextSepaTag(svwzPart);
|
||||
referenceText = (nextTagIdx >= 0 ? svwzPart.substring(0, nextTagIdx) : svwzPart).strip();
|
||||
}
|
||||
|
||||
// If nothing matched any structured format, treat the whole thing as the reference.
|
||||
if (referenceText == null && !collapsed.contains("?")) {
|
||||
referenceText = collapsed.strip();
|
||||
}
|
||||
|
||||
return new Mt86Parsed(referenceText, counterpartyName, counterpartyIban);
|
||||
}
|
||||
|
||||
/** Find the next SEPA tag (SVWZ+, EREF+, KREF+, MREF+, CRED+, DEBT+, ABWA+, ABWE+, IBAN+, BIC+). */
|
||||
private static int findNextSepaTag(String s) {
|
||||
String[] tags = {"EREF+", "KREF+", "MREF+", "CRED+", "DEBT+", "ABWA+", "ABWE+",
|
||||
"IBAN+", "BIC+", "SVWZ+"};
|
||||
int earliest = -1;
|
||||
for (String t : tags) {
|
||||
int idx = s.indexOf(t);
|
||||
if (idx >= 0 && (earliest < 0 || idx < earliest)) earliest = idx;
|
||||
}
|
||||
return earliest;
|
||||
}
|
||||
|
||||
private record Mt86Parsed(String referenceText, String counterpartyName, String counterpartyIban) {}
|
||||
|
||||
private record BalanceParse(LocalDate date, int amountCents) {}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
package de.cannamanage.service.bankimport;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Sprint 10 — Aggregated result of parsing one bank statement file.
|
||||
* <p>
|
||||
* Carries the parsed transactions plus statement-level metadata (account IBAN,
|
||||
* opening/closing balances) which is used for downstream sanity checks
|
||||
* (e.g. verifying that "sum of transactions" matches "closing − opening").
|
||||
* <p>
|
||||
* {@link #warnings} captures non-fatal parsing issues — fields that were missing,
|
||||
* lines that were skipped, unexpected format variants. These are surfaced in the
|
||||
* UI but don't abort the import.
|
||||
*
|
||||
* @param transactions parsed transactions, in source-file order
|
||||
* @param accountIban account this statement belongs to (may be {@code null} for CSV)
|
||||
* @param statementDate end date of the statement period (or transaction date for single-line statements)
|
||||
* @param openingBalanceCents opening balance in cents — signed, optional
|
||||
* @param closingBalanceCents closing balance in cents — signed, optional
|
||||
* @param warnings non-fatal parser warnings; empty list if clean
|
||||
*/
|
||||
public record ParseResult(
|
||||
List<ParsedTransaction> transactions,
|
||||
String accountIban,
|
||||
LocalDate statementDate,
|
||||
Integer openingBalanceCents,
|
||||
Integer closingBalanceCents,
|
||||
List<String> warnings
|
||||
) {}
|
||||
+32
@@ -0,0 +1,32 @@
|
||||
package de.cannamanage.service.bankimport;
|
||||
|
||||
import java.time.LocalDate;
|
||||
|
||||
/**
|
||||
* Sprint 10 — Single transaction extracted from a bank statement by a {@link BankStatementParser}.
|
||||
* <p>
|
||||
* Sign convention for {@link #amountCents}: <strong>positive = incoming</strong>
|
||||
* (potential member payment), <strong>negative = outgoing</strong> (expense).
|
||||
* <p>
|
||||
* This is an in-memory parsing artifact only; the import orchestrator later
|
||||
* converts each instance into a persisted {@code BankTransaction} entity.
|
||||
*
|
||||
* @param bookingDate date the bank posted the transaction
|
||||
* @param valueDate date the funds became available (may equal bookingDate)
|
||||
* @param amountCents transaction amount in cents (signed)
|
||||
* @param currency ISO 4217 code, almost always {@code "EUR"} for German banks
|
||||
* @param referenceText "Verwendungszweck" — concatenated free-text reference fields
|
||||
* @param counterpartyName payer (for credits) or payee (for debits) name
|
||||
* @param counterpartyIban payer/payee IBAN if available
|
||||
* @param bankReference bank's own transaction reference (EREF/KREF/MREF for SEPA, NtryRef for CAMT)
|
||||
*/
|
||||
public record ParsedTransaction(
|
||||
LocalDate bookingDate,
|
||||
LocalDate valueDate,
|
||||
int amountCents,
|
||||
String currency,
|
||||
String referenceText,
|
||||
String counterpartyName,
|
||||
String counterpartyIban,
|
||||
String bankReference
|
||||
) {}
|
||||
+1
-1
@@ -165,7 +165,7 @@ public class MemberListRegistryGenerator implements ReportGenerator<DateRangeRep
|
||||
return switch (status) {
|
||||
case ACTIVE -> "Aktiv";
|
||||
case SUSPENDED -> "Gesperrt";
|
||||
case RESIGNED -> "Ausgetreten";
|
||||
case RESIGNED, LEFT -> "Ausgetreten";
|
||||
case EXPELLED -> "Ausgeschlossen";
|
||||
case PENDING_APPROVAL -> "Aufnahme ausstehend";
|
||||
};
|
||||
|
||||
+38
@@ -0,0 +1,38 @@
|
||||
package de.cannamanage.service.repository;
|
||||
|
||||
import de.cannamanage.domain.entity.BankImportSession;
|
||||
import de.cannamanage.domain.enums.ImportSessionStatus;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Repository for {@link BankImportSession} (Sprint 10).
|
||||
* <p>
|
||||
* All queries are tenant-scoped automatically by Hibernate's {@code @Filter}
|
||||
* defined on {@code AbstractTenantEntity}, so {@code clubId} parameters are
|
||||
* an additional safety filter within a tenant rather than a security boundary.
|
||||
*/
|
||||
@Repository
|
||||
public interface BankImportSessionRepository extends JpaRepository<BankImportSession, UUID> {
|
||||
|
||||
/** Import history for a club, most recent first — drives the import history page. */
|
||||
List<BankImportSession> findByClubIdOrderByCreatedAtDesc(UUID clubId);
|
||||
|
||||
/** Used by the "Resume Import" banner: list of sessions that need finishing. */
|
||||
List<BankImportSession> findByClubIdAndStatusOrderByCreatedAtDesc(UUID clubId, ImportSessionStatus status);
|
||||
|
||||
/**
|
||||
* Duplicate-import guard: returns a recent session for this club with the same filename,
|
||||
* created after the given cutoff. Used to warn the admin before re-importing the same file.
|
||||
*/
|
||||
Optional<BankImportSession> findFirstByClubIdAndFilenameAndCreatedAtAfter(
|
||||
UUID clubId, String filename, Instant cutoff);
|
||||
|
||||
/** Tier-limit enforcement: count Starter-plan imports in the current month. */
|
||||
long countByClubIdAndCreatedAtAfter(UUID clubId, Instant since);
|
||||
}
|
||||
+27
@@ -0,0 +1,27 @@
|
||||
package de.cannamanage.service.repository;
|
||||
|
||||
import de.cannamanage.domain.entity.BankTransaction;
|
||||
import de.cannamanage.domain.enums.MatchStatus;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Repository for {@link BankTransaction} (Sprint 10).
|
||||
* <p>
|
||||
* Tenant-scoping is applied automatically by Hibernate {@code @Filter}.
|
||||
*/
|
||||
@Repository
|
||||
public interface BankTransactionRepository extends JpaRepository<BankTransaction, UUID> {
|
||||
|
||||
/** All transactions of a session, in booking order — drives the review table. */
|
||||
List<BankTransaction> findBySessionIdOrderByBookingDateAsc(UUID sessionId);
|
||||
|
||||
/** Filter by review status — for the "Matched / Suggested / Unmatched / Skipped" tabs. */
|
||||
List<BankTransaction> findBySessionIdAndMatchStatus(UUID sessionId, MatchStatus matchStatus);
|
||||
|
||||
/** Counters for session-level statistics displayed in the wizard. */
|
||||
long countBySessionIdAndMatchStatus(UUID sessionId, MatchStatus matchStatus);
|
||||
}
|
||||
+27
@@ -0,0 +1,27 @@
|
||||
package de.cannamanage.service.repository;
|
||||
|
||||
import de.cannamanage.domain.entity.CsvColumnMapping;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Repository for {@link CsvColumnMapping} (Sprint 10).
|
||||
* <p>
|
||||
* Tenant-scoping is applied automatically by Hibernate {@code @Filter}.
|
||||
*/
|
||||
@Repository
|
||||
public interface CsvColumnMappingRepository extends JpaRepository<CsvColumnMapping, UUID> {
|
||||
|
||||
/** All saved CSV mappings for a club — drives the template dropdown in the upload wizard. */
|
||||
List<CsvColumnMapping> findByClubId(UUID clubId);
|
||||
|
||||
/** Default mapping (if any) — pre-selected in the upload wizard. */
|
||||
Optional<CsvColumnMapping> findByClubIdAndIsDefaultTrue(UUID clubId);
|
||||
|
||||
/** Tier-limit enforcement: Pro plan = max 3 templates per club. */
|
||||
long countByClubId(UUID clubId);
|
||||
}
|
||||
Reference in New Issue
Block a user