#!/usr/bin/env python3 """Parse FUSION_API_index.html using regex to extract all GraphQL operations.""" import re import json def strip_html(text): """Remove HTML tags and clean whitespace.""" text = re.sub(r'<[^>]+>', '', text) text = re.sub(r'\s+', ' ', text).strip() return text def extract_code_text(html): """Extract text content from code blocks, stripping span tags.""" return strip_html(html) def main(): print("Reading HTML file...") with open('/Users/daveporter/Desktop/CODING-2024/DALIM-API/FUSION_API_index.html', 'r', encoding='utf-8') as f: html = f.read() print(f"HTML size: {len(html):,} bytes") # Extract all operation sections # Each section starts with
]*class="operation[^"]*"[^>]*>(.*?)
' sections = re.findall(section_pattern, html, re.DOTALL) print(f"Found {len(sections)} operation sections") operations = [] for op_type, op_name, content in sections: op = { 'name': op_name, 'type': op_type, 'description': '', 'response_type': '', 'arguments': [], 'example_query': '', 'example_variables': '', 'example_response': '' } # Description desc_match = re.search(r'class="operation-description[^"]*"[^>]*>.*?

(.*?)

', content, re.DOTALL) if desc_match: op['description'] = strip_html(desc_match.group(1)) # Response type resp_match = re.search(r'class="operation-response[^"]*"[^>]*>.*?Returns\s+(.*?)

', content, re.DOTALL) if resp_match: op['response_type'] = strip_html(resp_match.group(1)) # Arguments from table args_section = re.search(r'class="operation-arguments[^"]*"[^>]*>(.*?)', content, re.DOTALL) if args_section: rows = re.findall(r'\s*(.*?)\s*(.*?)\s*', args_section.group(1), re.DOTALL) for name_cell, desc_cell in rows: arg_name = '' arg_type = '' name_match = re.search(r'class="property-name"[^>]*>(.*?)', name_cell, re.DOTALL) if name_match: arg_name = strip_html(name_match.group(1)) type_match = re.search(r'class="property-type"[^>]*>(.*?)', name_cell, re.DOTALL) if type_match: arg_type = strip_html(type_match.group(1)) required = 'required' in name_cell.lower() or '!' in arg_type desc = strip_html(desc_cell) if arg_name: op['arguments'].append({ 'name': arg_name, 'type': arg_type, 'description': desc, 'required': required }) # Example query query_example = re.search(r'class="[^"]*operation-query-example[^"]*"[^>]*>.*?
]*>(.*?)
', content, re.DOTALL) if query_example: op['example_query'] = strip_html(query_example.group(1)) # Example variables vars_example = re.search(r'class="[^"]*operation-variables-example[^"]*"[^>]*>.*?
]*>(.*?)
', content, re.DOTALL) if vars_example: op['example_variables'] = strip_html(vars_example.group(1)) # Example response resp_example = re.search(r'class="[^"]*operation-response-example[^"]*"[^>]*>.*?
]*>(.*?)
', content, re.DOTALL) if resp_example: op['example_response'] = strip_html(resp_example.group(1)) operations.append(op) # Count stats queries = [op for op in operations if op['type'] == 'query'] mutations = [op for op in operations if op['type'] == 'mutation'] subscriptions = [op for op in operations if op['type'] == 'subscription'] print(f"\nResults:") print(f" Queries: {len(queries)}") print(f" Mutations: {len(mutations)}") print(f" Subscriptions: {len(subscriptions)}") with_desc = sum(1 for op in operations if op['description']) with_args = sum(1 for op in operations if op['arguments']) with_resp = sum(1 for op in operations if op['response_type']) with_example = sum(1 for op in operations if op['example_query']) print(f" With description: {with_desc}/{len(operations)}") print(f" With arguments: {with_args}/{len(operations)}") print(f" With response type: {with_resp}/{len(operations)}") print(f" With example query: {with_example}/{len(operations)}") # Save with open('/Users/daveporter/Desktop/CODING-2024/DALIM-API/docs/api_parsed.json', 'w') as f: json.dump({'operations': operations}, f, indent=2) print("\nSaved to docs/api_parsed.json") # Print a few samples for name in ['activities', 'createProject', 'createUser', 'search', 'assets']: matches = [op for op in operations if op['name'] == name] if matches: op = matches[0] print(f"\n--- {op['type']} {op['name']} ---") print(f" Desc: {op['description'][:100]}") print(f" Response: {op['response_type']}") print(f" Args ({len(op['arguments'])}):") for a in op['arguments'][:3]: print(f" {a['name']}: {a['type']} {'(required)' if a['required'] else ''} - {a['description'][:60]}") if len(op['arguments']) > 3: print(f" ... and {len(op['arguments'])-3} more") if __name__ == '__main__': main()