Files
sar/.claude/skills/bmad-workflow-builder/scripts/extract-report-json.py
julian 17c08e6392 chore: initial monorepo scaffold + WDS Phase 1+2 artifacts
- Nx 22.7 monorepo (pnpm 11.1, TypeScript 5.9, Node 24)
- apps/api: NestJS 11 (CJS conforme CODING-RULES.md PGD-DB-004)
- apps/web: React 19 + Vite 8 (ESM)
- libs/shared/api-interface: Zod contract base
- Docker Compose dev: Postgres 18, Valkey 8, MinIO, Mailpit
- WDS artifacts:
  - design-artifacts/A-Product-Brief/ (5 docs canônicos + 16 dialogs)
  - design-artifacts/B-Trigger-Map/ (hub + 4 personas + feature impact)
- Stack canon: STACK.md v2.2 + CODING-RULES.md v2.0 + brand.md
- AGENTS.md + README.md como entrada para devs/agentes

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 14:34:20 +00:00

288 lines
12 KiB
Python

#!/usr/bin/env python3
"""Deterministic extraction of report-data.json from analysis outputs.
Reads scanner outputs (markdown + JSON) and extracts structured data without
LLM synthesis. Ensures no data loss and completes in <10 seconds.
Usage:
python3 extract-report-json.py {skill-path} {quality-report-dir} -o {output-file}
"""
from __future__ import annotations
import argparse
import json
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
def extract_section(content: str, section_name: str, level: int = 2) -> str | None:
"""Extract a section from markdown by heading name."""
pattern = r'^#{' + str(level) + r'}\s+' + re.escape(section_name) + r'\s*\n(.*?)(?=^#{1,' + str(level) + r'}\s|\Z)'
match = re.search(pattern, content, re.MULTILINE | re.DOTALL)
return match.group(1).strip() if match else None
def extract_journeys(content: str) -> list[dict]:
"""Extract user journey archetypes from enhancement-analysis.md."""
journeys = []
# Match ### N. {Name}: {Description}
pattern = r'^###\s+\d+\.\s+([^:]+):\s+(.+?)(?=^###|\Z)'
for match in re.finditer(pattern, content, re.MULTILINE | re.DOTALL):
name = match.group(1).strip()
section = match.group(2)
# Extract narrative (after "Narrative." or "Narrative\n")
narrative_match = re.search(r'(?:Narrative[:.]\s*)?([^\n]+(?:\n[^*\n][^\n]*)*?)(?=\n\*\*|\n[A-Z])', section)
summary = narrative_match.group(1).strip() if narrative_match else ""
# Extract friction points
friction_points = []
friction_section = re.search(r'\*\*Friction points?[:\*]*\*\*\s*\n(.*?)(?=\n\*\*|\n[A-Z]|$)', section, re.DOTALL)
if friction_section:
for line in friction_section.group(1).split('\n'):
line = line.strip()
if line.startswith('- '):
friction_points.append(line[2:].strip())
# Extract bright spots
bright_spots = []
bright_section = re.search(r'\*\*Bright spots?[:\*]*\*\*\s*\n(.*?)(?=\n\*\*|\n[A-Z]|$)', section, re.DOTALL)
if bright_section:
for line in bright_section.group(1).split('\n'):
line = line.strip()
if line.startswith('- '):
bright_spots.append(line[2:].strip())
journeys.append({
'archetype': name,
'summary': summary,
'friction_points': friction_points,
'bright_spots': bright_spots
})
return journeys
def extract_autonomous(content: str) -> dict:
"""Extract headless/automation assessment from enhancement-analysis.md."""
assessment_section = extract_section(content, 'Headless Assessment', level=2)
if not assessment_section:
return {}
# Look for "Current Level:" or "Potential:" pattern
potential_match = re.search(r'(?:Current Level|Potential)[:\*]*\s*([^\n.]+)', assessment_section)
potential = potential_match.group(1).strip() if potential_match else "unknown"
# Get the rest as notes
notes = assessment_section
if potential_match:
notes = assessment_section[potential_match.end():].strip()
return {
'potential': potential,
'notes': notes[:200] if notes else "" # Truncate to 200 chars
}
def extract_findings_from_md(content: str, source_scanner: str) -> list[dict]:
"""Extract individual findings from analysis markdown.
Handles multiple formats:
- Architecture: level 4 headings under severity sections (### HIGH, etc)
- Determinism: bold headings with severity markers [HIGH], [LOW]
- Customization: bold headings with opportunity markers (HIGH-OPPORTUNITY, etc)
- Enhancement: numbered findings with severity/opportunity markers
"""
findings = []
if source_scanner == 'architecture':
# Architecture format: ### SEVERITY followed by #### N. Title
severity_pattern = r'^###\s+(CRITICAL|HIGH|MEDIUM|LOW)\s*$'
severity_sections = re.split(severity_pattern, content, flags=re.MULTILINE)
for i in range(1, len(severity_sections), 2):
severity = severity_sections[i].lower() if i < len(severity_sections) else "medium"
section_content = severity_sections[i + 1] if i + 1 < len(severity_sections) else ""
if not section_content.strip() or section_content.strip() == "None":
continue
# Extract level 4 findings (#### N. Title)
finding_pattern = r'^####\s+(\d+\.\s+)?(.+?)$'
for match in re.finditer(finding_pattern, section_content, re.MULTILINE):
finding_title = match.group(2).strip()
if finding_title:
findings.append({
'title': finding_title,
'severity': severity,
'source': source_scanner
})
elif source_scanner == 'determinism':
# Determinism format: ### **[SEVERITY] Title**
pattern = r'###\s+\*\*\[([A-Z]+)\]\s+([^*]+)\*\*'
for match in re.finditer(pattern, content, re.MULTILINE):
severity = match.group(1).lower()
title = match.group(2).strip()
if title:
findings.append({
'title': title,
'severity': severity,
'source': source_scanner
})
elif source_scanner == 'customization':
# Customization format: ### N. **Title** (OPPORTUNITY-TYPE)
pattern = r'###\s+\d+\.\s+\*\*([^*]+)\*\*\s+\(([A-Z-]+)\)'
for match in re.finditer(pattern, content, re.MULTILINE):
title = match.group(1).strip()
opportunity = match.group(2).lower()
# Map opportunity to severity
severity = 'high' if 'high' in opportunity else 'medium' if 'medium' in opportunity else 'low'
if title:
findings.append({
'title': title,
'severity': severity,
'source': source_scanner
})
elif source_scanner == 'enhancement':
# Enhancement format: ### LEVEL Findings section followed by #### N. Title
# Extract opportunity sections (HIGH-OPPORTUNITY, SECONDARY-OPPORTUNITY, etc)
opportunity_pattern = r'^###\s+([A-Z-]+)\s+(?:Findings|Opportunities?)'
opportunity_sections = re.split(opportunity_pattern, content, flags=re.MULTILINE)
for i in range(1, len(opportunity_sections), 2):
opportunity = opportunity_sections[i].lower() if i < len(opportunity_sections) else "medium"
section_content = opportunity_sections[i + 1] if i + 1 < len(opportunity_sections) else ""
if not section_content.strip():
continue
# Map opportunity to severity
severity = 'high' if 'high' in opportunity else 'medium' if 'secondary' in opportunity else 'low'
# Extract level 4 findings (#### N. Title)
finding_pattern = r'^####\s+(\d+\.\s+)?(.+?)$'
for match in re.finditer(finding_pattern, section_content, re.MULTILINE):
finding_title = match.group(2).strip()
if finding_title:
findings.append({
'title': finding_title,
'severity': severity,
'source': source_scanner
})
return findings
def merge_prepass_data(report_dir: Path) -> dict:
"""Load and merge all prepass JSON data."""
merged = {}
for json_file in report_dir.glob('*-prepass.json'):
try:
data = json.loads(json_file.read_text(encoding='utf-8'))
merged.update(data)
except Exception:
pass # Skip if not valid JSON
return merged
def build_report_json(skill_path: str, quality_report_dir: str) -> dict:
"""Extract and build complete report-data.json."""
report_dir = Path(quality_report_dir)
skill_name = Path(skill_path).name
timestamp = datetime.now(timezone.utc).isoformat()
# Read all analysis files
architecture_content = (report_dir / 'architecture-analysis.md').read_text(encoding='utf-8') if (report_dir / 'architecture-analysis.md').exists() else ""
determinism_content = (report_dir / 'determinism-analysis.md').read_text(encoding='utf-8') if (report_dir / 'determinism-analysis.md').exists() else ""
customization_content = (report_dir / 'customization-analysis.md').read_text(encoding='utf-8') if (report_dir / 'customization-analysis.md').exists() else ""
enhancement_content = (report_dir / 'enhancement-analysis.md').read_text(encoding='utf-8') if (report_dir / 'enhancement-analysis.md').exists() else ""
# Extract assessments
arch_assessment = extract_section(architecture_content, 'Assessment', level=2) or ""
det_assessment = extract_section(determinism_content, 'Assessment', level=2) or ""
cust_assessment = extract_section(customization_content, 'Overall Assessment', level=2) or ""
enh_assessment = extract_section(enhancement_content, 'Summary', level=2) or ""
# Extract journeys and autonomous from enhancement
journeys = extract_journeys(enhancement_content)
autonomous = extract_autonomous(enhancement_content)
# Build detailed_analysis
detailed_analysis = {
'architecture': {
'assessment': arch_assessment[:500], # First 500 chars
'findings': extract_findings_from_md(architecture_content, 'architecture')
},
'determinism': {
'assessment': det_assessment[:500],
'findings': extract_findings_from_md(determinism_content, 'determinism')
},
'customization': {
'assessment': cust_assessment[:500],
'posture': 'not-opted-in', # From content
'findings': extract_findings_from_md(customization_content, 'customization')
},
'enhancement': {
'assessment': enh_assessment[:500],
'journeys': journeys,
'autonomous': autonomous,
'findings': extract_findings_from_md(enhancement_content, 'enhancement')
}
}
# Build basic structure - minimal for now, will be expanded by report creator if needed
report_data = {
'meta': {
'skill_name': skill_name,
'skill_path': skill_path,
'timestamp': timestamp,
'scanner_count': 4
},
'narrative': enh_assessment[:150] if enh_assessment else "", # Placeholder
'grade': 'Good', # Placeholder - report creator sets this
'broken': [],
'opportunities': [],
'strengths': [],
'recommendations': [],
'detailed_analysis': detailed_analysis
}
return report_data
def main():
parser = argparse.ArgumentParser(description='Extract report-data.json from analysis outputs')
parser.add_argument('skill_path', help='Path to the skill being analyzed')
parser.add_argument('quality_report_dir', help='Directory with analysis outputs and where to write report')
parser.add_argument('-o', '--output', help='Output file path (default: {quality_report_dir}/report-data.json)')
args = parser.parse_args()
output_path = args.output or str(Path(args.quality_report_dir) / 'report-data.json')
try:
report_json = build_report_json(args.skill_path, args.quality_report_dir)
# Write output
output_file = Path(output_path)
output_file.write_text(json.dumps(report_json, indent=2, ensure_ascii=False), encoding='utf-8')
print(f'Report JSON written to {output_path}', file=sys.stderr)
print(json.dumps({'status': 'success', 'output': output_path}, indent=2))
except Exception as e:
print(f'Error: {e}', file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()