Files
archie/tests/integration_test.py
Mario Fetka 1e4baef047 Port Archie 3.5 to Linux/CMake, add Debian packaging and CI
- Replace autoconf/make build system with CMake (installs to /opt/archie)
- Add CPack DEB packaging for Debian Trixie (non-free/net, postinst creates
  archie user, extracts DB skeleton, sets setuid bits, enables systemd units)
- Add Gitea Actions workflow building .deb + binary/source tarballs on tag push
- Add portable archie_init.py for non-Debian post-install setup
- Port all scripts to Linux: getent passwd, systemctl, tail -n +N, gzip
- Add SFTP (libssh2) and FTPS (OpenSSL) scrapers alongside anonftp
- Add Flask web frontend (archie-web.service)
- Fix filter scripts (exec cat replaces broken sed s///g)
- Update all manpages: paths, contacts, add SFTP/FTPS section
- Update etc/: enable gzip, add webindex catalog, fix localhost refs
- Remove: AIX-2/SunOS-4.1.4/SunOS-5.4 dirs, tcl7.6/, tcl-dp/, tk4.2/,
  berkdb/, old Makefile.in/pre/post fragments, build.sh, unwrap scripts
- Add .gitignore

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-22 23:05:12 +02:00

427 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Archie 3.5 Integration Test
Tests the full pipeline: FTP/FTPS/SFTP server connectivity + archie DB pipeline
"""
import os
import sys
import subprocess
import tempfile
import shutil
import socket
import time
import datetime
import signal
import atexit
import ftplib
import threading
_HERE = os.path.dirname(os.path.abspath(__file__))
_SRC = os.path.normpath(os.path.join(_HERE, '..'))
# ARCHIE_BUILD_DIR can be set by ctest (CMAKE_BINARY_DIR) or defaults to build_test
BUILD_DIR = os.environ.get('ARCHIE_BUILD_DIR',
os.path.join(_SRC, 'build_test'))
SERVERS_DIR = os.path.join(_HERE, 'servers')
ARCHIE_SRC = os.path.join(_SRC, 'archie')
ARCHIE_SCRIPTS_DIR = os.path.join(_SRC, 'scripts')
# Tool paths
def tool(name):
for subdir in ['archie/tools', 'archie/anonftp/update', 'archie/anonftp/parse',
'archie/anonftp/retrieve', 'archie/clients/cgi', 'archie/clients/telnet']:
p = os.path.join(BUILD_DIR, subdir, name)
if os.path.isfile(p):
return p
raise FileNotFoundError(f"Tool not found: {name}")
RESULTS = []
PROCS = []
def cleanup_procs():
for p in PROCS:
try:
p.terminate()
p.wait(timeout=3)
except Exception:
try:
p.kill()
except Exception:
pass
atexit.register(cleanup_procs)
def ok(msg):
RESULTS.append(('PASS', msg))
print(f" [PASS] {msg}")
def fail(msg, detail=''):
RESULTS.append(('FAIL', msg))
print(f" [FAIL] {msg}")
if detail:
print(f" {detail}")
def section(title):
print(f"\n{'='*60}")
print(f" {title}")
print('='*60)
def wait_for_port(host, port, timeout=10):
"""Poll until port is open or timeout."""
deadline = time.time() + timeout
while time.time() < deadline:
try:
s = socket.create_connection((host, port), timeout=0.5)
s.close()
return True
except OSError:
time.sleep(0.2)
return False
# ─── 1. Smoke test: all binaries present ─────────────────────────
section("1. Binary existence check")
REQUIRED_TOOLS = [
'db_build', 'db_check', 'db_dump', 'db_stats',
'ardomains', 'dump_hostdb', 'fix_start_db',
]
for subdir, names in [
('archie/tools', REQUIRED_TOOLS),
('archie/anonftp/update', ['check_anonftp','delete_anonftp','insert_anonftp',
'net_anonftp','update_anonftp']),
('archie/anonftp/parse', ['parse_anonftp', 'parse_anonftp_unix_bsd']),
('archie/anonftp/retrieve', ['retrieve_anonftp','retrieve_anonsftp','retrieve_anonftps']),
('archie/clients/cgi', ['cgi-client']),
('archie/clients/telnet', ['telnet-client']),
]:
for name in names:
p = os.path.join(BUILD_DIR, subdir, name)
if os.path.isfile(p):
ok(f"{name} exists")
else:
fail(f"{name} missing", p)
# ─── 2. Start test servers ─────────────────────────────────────────
section("2. Start test servers")
FTP_PORT = 2121
FTPS_PORT = 2122
SFTP_PORT = 2223
FTP_ROOT = '/tmp/archie-ftp-root'
os.makedirs(FTP_ROOT, exist_ok=True)
def start_server(script, *extra_args):
p = subprocess.Popen(
[sys.executable, os.path.join(SERVERS_DIR, script)] + list(extra_args),
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
PROCS.append(p)
return p
ftp_proc = start_server('ftp_server.py', '--port', str(FTP_PORT), '--root', FTP_ROOT)
ftps_proc = start_server('ftps_server.py', '--port', str(FTPS_PORT), '--root', FTP_ROOT)
sftp_proc = start_server('sftp_server.py', '--port', str(SFTP_PORT), '--root', FTP_ROOT)
for name, port, proc in [('FTP', FTP_PORT, ftp_proc),
('FTPS', FTPS_PORT, ftps_proc),
('SFTP', SFTP_PORT, sftp_proc)]:
if wait_for_port('127.0.0.1', port, timeout=15):
ok(f"{name} server ready on port {port}")
else:
rc = proc.poll()
fail(f"{name} server failed to start", f"exit={rc}" if rc is not None else "timeout")
# ─── 3. Populate test data ────────────────────────────────────────
section("3. Populate test data")
pop = subprocess.run(
[sys.executable, os.path.join(SERVERS_DIR, 'populate_test_data.py'), '--root', FTP_ROOT],
capture_output=True, text=True
)
if pop.returncode == 0:
ok("Test data populated in FTP/SFTP root")
else:
fail("populate_test_data failed", pop.stderr[:200])
# ─── 4. FTP connectivity test ─────────────────────────────────────
section("4. FTP server connectivity")
try:
ftp = ftplib.FTP()
ftp.connect('127.0.0.1', FTP_PORT, timeout=5)
ftp.login('anonymous', 'test@example.com')
listing = []
ftp.retrlines('LIST /pub', listing.append)
ftp.quit()
if listing:
ok(f"FTP anonymous login + LIST works ({len(listing)} entries)")
else:
fail("FTP LIST returned empty listing")
except Exception as e:
fail(f"FTP connectivity failed", str(e))
# testuser login
try:
ftp = ftplib.FTP()
ftp.connect('127.0.0.1', FTP_PORT, timeout=5)
ftp.login('testuser', 'testpass')
ftp.quit()
ok("FTP testuser login works")
except Exception as e:
fail(f"FTP testuser login failed", str(e))
# ─── 5. FTPS connectivity test ────────────────────────────────────
section("5. FTPS server connectivity")
try:
import ssl
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
ftp = ftplib.FTP_TLS(context=ctx)
ftp.connect('127.0.0.1', FTPS_PORT, timeout=5)
ftp.login('testuser', 'testpass')
ftp.prot_p()
listing = []
ftp.retrlines('LIST /pub', listing.append)
ftp.quit()
ok(f"FTPS login + LIST works ({len(listing)} entries)")
except Exception as e:
fail(f"FTPS connectivity failed", str(e))
# ─── 6. SFTP connectivity test ────────────────────────────────────
section("6. SFTP server connectivity")
try:
import paramiko
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect('127.0.0.1', port=SFTP_PORT, username='testuser',
password='testpass', timeout=5)
sftp = ssh.open_sftp()
entries = sftp.listdir('/pub')
sftp.close()
ssh.close()
ok(f"SFTP login + listdir works ({len(entries)} entries)")
except Exception as e:
fail(f"SFTP connectivity failed", str(e))
# ─── 7. Archie DB pipeline test ───────────────────────────────────
section("7. Archie DB pipeline (parse + insert + query)")
ARCHIE_HOME = tempfile.mkdtemp(prefix='archie_test_')
def archie_cleanup():
shutil.rmtree(ARCHIE_HOME, ignore_errors=True)
atexit.register(archie_cleanup)
# Create directory structure matching archie's expectations
# DEFAULT_HOST_DB_DIR = "./host", DB_SUFFIX = "_db" → db/./host_db = db/host_db
for d in ['db', 'db/host_db', 'etc', 'anonftp', 'tmp', 'incoming', 'locks', 'bin', 'logs']:
os.makedirs(os.path.join(ARCHIE_HOME, d), exist_ok=True)
# archie.hostname
hostname = socket.gethostname()
with open(os.path.join(ARCHIE_HOME, 'etc', 'archie.hostname'), 'w') as f:
f.write(hostname + '\n')
# arretdefs.cf
with open(os.path.join(ARCHIE_HOME, 'etc', 'arretdefs.cf'), 'w') as f:
f.write(f"anonftp:unix_bsd:I:.Z:anonymous:archie@{hostname}:::ls-lR:ls-lR.Z\n")
# Install filter_anonftp_unix_bsd (from scripts/, shebang already fixed)
filter_src = os.path.join(ARCHIE_SCRIPTS_DIR, 'filter_anonftp_unix_bsd')
filter_dst = os.path.join(ARCHIE_HOME, 'bin', 'filter_anonftp_unix_bsd')
if os.path.exists(filter_src):
with open(filter_src, 'r', errors='replace') as f:
content = f.read()
# Ensure Linux-compatible shebang (scripts/ should already have it fixed)
content = content.replace('#!/usr/tgcware/bin/perl', '#!/usr/bin/env perl', 1)
with open(filter_dst, 'w') as f:
f.write(content)
os.chmod(filter_dst, 0o755)
ok("Installed filter_anonftp_unix_bsd to ARCHIE_HOME/bin/")
else:
fail("filter_anonftp_unix_bsd not found in scripts/", filter_src)
# Install parse_anonftp_unix_bsd (built binary)
try:
parse_bsd_src = tool('parse_anonftp_unix_bsd')
parse_bsd_dst = os.path.join(ARCHIE_HOME, 'bin', 'parse_anonftp_unix_bsd')
shutil.copy2(parse_bsd_src, parse_bsd_dst)
os.chmod(parse_bsd_dst, 0o755)
ok("Installed parse_anonftp_unix_bsd to ARCHIE_HOME/bin/")
except FileNotFoundError as e:
fail("parse_anonftp_unix_bsd not found in build", str(e))
# Initialize the DB (db_build needs db/host_db/ to already exist)
# Use ARCH_USER=mario so get_archie_home() finds /home/mario (log dir errors are non-fatal)
db_build_exe = tool('db_build')
r = subprocess.run(
[db_build_exe, '-M', os.path.join(ARCHIE_HOME, 'db')],
capture_output=True, text=True,
env={**os.environ, 'ARCH_USER': os.environ.get('USER', 'mario'),
'HOME': ARCHIE_HOME}
)
if r.returncode == 0:
ok("db_build initialized master database")
else:
fail("db_build failed", (r.stdout + r.stderr)[:400])
# Create a synthetic .parse file (retrieve_anonftp output format):
# header block + ls-lR style directory listing
NOW = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')
TEST_HOST = '127.0.0.1'
PARSE_FILE = os.path.join(ARCHIE_HOME, 'incoming', f'{TEST_HOST}.anonftp.parse')
LS_LR = """\
.:
total 4
drwxr-xr-x 3 root root 4096 Jun 21 12:00 pub
./pub:
total 8
drwxr-xr-x 2 root root 4096 Jun 21 12:00 linux
drwxr-xr-x 2 root root 4096 Jun 21 12:00 gnu
./pub/linux:
total 12
-rw-r--r-- 1 root root 102400 Jun 21 12:00 kernel-5.15.0.tar.gz
-rw-r--r-- 1 root root 81920 Jun 20 08:30 kernel-5.14.0.tar.gz
./pub/gnu:
total 8
-rw-r--r-- 1 root root 51200 Jun 21 10:00 bash-5.2.tar.gz
-rw-r--r-- 1 root root 40960 Jun 19 14:22 gcc-13.1.tar.gz
"""
with open(PARSE_FILE, 'w') as f:
f.write("@header_begin\n")
f.write(f"generated_by retrieve\n")
f.write(f"source_archie_hostname {hostname}\n")
f.write(f"primary_hostname {TEST_HOST}\n")
f.write(f"preferred_hostname {TEST_HOST}\n")
f.write(f"primary_ipaddr {TEST_HOST}\n")
f.write("access_method anonftp\n")
f.write("os_type unix_bsd\n")
f.write(f"retrieve_time {NOW}\n")
f.write("current_status active\n")
f.write("update_status succeed\n")
f.write("action_status update\n")
f.write("format raw\n")
f.write("prospero_host no\n")
f.write("@header_end\n")
f.write(LS_LR)
ok(f"Created synthetic .parse file: {os.path.basename(PARSE_FILE)}")
# Run parse_anonftp
# Use ARCH_USER pointing to a nonexistent user so get_archie_home() returns '.'
# Set cwd=ARCHIE_HOME so './bin/filter_anonftp_unix_bsd' resolves correctly
parse_out = os.path.join(ARCHIE_HOME, 'incoming', f'{TEST_HOST}.anonftp')
parse_exe = tool('parse_anonftp')
r = subprocess.run(
[parse_exe,
'-M', os.path.join(ARCHIE_HOME, 'db'),
'-i', PARSE_FILE,
'-o', parse_out,
'-v'],
capture_output=True, text=True,
cwd=ARCHIE_HOME,
env={**os.environ,
'ARCH_USER': '_archie_test_no_user_',
'HOME': ARCHIE_HOME}
)
if r.returncode == 0:
ok("parse_anonftp succeeded")
else:
fail("parse_anonftp failed", (r.stdout + r.stderr)[:400])
# List what parse_anonftp created
created = [f for f in os.listdir(os.path.join(ARCHIE_HOME, 'incoming'))
if TEST_HOST in f and f != os.path.basename(PARSE_FILE)]
if created:
ok(f"parse_anonftp created: {', '.join(sorted(created))}")
else:
fail("parse_anonftp created no output files")
# Run insert_anonftp (on each .insert file if any)
insert_files = [f for f in os.listdir(os.path.join(ARCHIE_HOME, 'incoming'))
if f.endswith('.insert') or f.endswith('.insert_t')]
insert_exe = tool('insert_anonftp')
if insert_files:
for inf in insert_files:
r = subprocess.run(
[insert_exe,
'-M', os.path.join(ARCHIE_HOME, 'db'),
'-i', os.path.join(ARCHIE_HOME, 'incoming', inf),
'-v'],
capture_output=True, text=True,
env={**os.environ, 'ARCH_USER': os.environ.get('USER', 'mario'),
'HOME': ARCHIE_HOME}
)
if r.returncode == 0:
ok(f"insert_anonftp succeeded ({inf})")
else:
fail(f"insert_anonftp failed ({inf})", (r.stdout + r.stderr)[:400])
else:
print(" [INFO] No .insert files produced (may require host DB setup via host_manage)")
# cgi-client query test — send a query via stdin
cgi_exe = tool('cgi-client')
query_input = (
"query=kernel\n"
"database=anonftp\n"
"type=sub\n"
"case=insensitive\n"
"maxhits=10\n"
)
r = subprocess.run(
[cgi_exe, '-M', os.path.join(ARCHIE_HOME, 'db')],
input=query_input, capture_output=True, text=True,
env={**os.environ, 'ARCH_USER': os.environ.get('USER', 'mario'),
'HOME': ARCHIE_HOME},
timeout=10
)
if r.returncode == 0 and ('HITS=' in r.stdout or 'hits' in r.stdout.lower()):
ok(f"cgi-client query returned results")
hits = [l for l in r.stdout.splitlines() if l.startswith('HITS=')]
if hits:
print(f" {hits[0]}")
elif r.returncode == 0:
ok("cgi-client ran (no results yet — DB may need full init via host_manage + arcontrol)")
print(f" stdout: {r.stdout[:200]}")
else:
fail(f"cgi-client failed", (r.stdout + r.stderr)[:400])
# ─── Summary ──────────────────────────────────────────────────────
section("Summary")
passed = sum(1 for r, _ in RESULTS if r == 'PASS')
failed = sum(1 for r, _ in RESULTS if r == 'FAIL')
total = len(RESULTS)
print(f"\n {passed}/{total} tests passed, {failed} failed\n")
if failed:
print(" Failed tests:")
for r, msg in RESULTS:
if r == 'FAIL':
print(f" - {msg}")
sys.exit(0 if failed == 0 else 1)