#!/usr/bin/env python3
"""Parse FUSION_API_index.html using regex to extract all GraphQL operations."""
import re
import json
def strip_html(text):
"""Remove HTML tags and clean whitespace."""
text = re.sub(r'<[^>]+>', '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text
def extract_code_text(html):
"""Extract text content from code blocks, stripping span tags."""
return strip_html(html)
def main():
print("Reading HTML file...")
with open('/Users/daveporter/Desktop/CODING-2024/DALIM-API/FUSION_API_index.html', 'r', encoding='utf-8') as f:
html = f.read()
print(f"HTML size: {len(html):,} bytes")
# Extract all operation sections
# Each section starts with
(.*?)
', content, re.DOTALL) if desc_match: op['description'] = strip_html(desc_match.group(1)) # Response type resp_match = re.search(r'class="operation-response[^"]*"[^>]*>.*?Returns\s+(.*?)', content, re.DOTALL) if resp_match: op['response_type'] = strip_html(resp_match.group(1)) # Arguments from table args_section = re.search(r'class="operation-arguments[^"]*"[^>]*>(.*?)', content, re.DOTALL) if args_section: rows = re.findall(r']*>(.*?)', content, re.DOTALL)
if query_example:
op['example_query'] = strip_html(query_example.group(1))
# Example variables
vars_example = re.search(r'class="[^"]*operation-variables-example[^"]*"[^>]*>.*?]*>(.*?)', content, re.DOTALL)
if vars_example:
op['example_variables'] = strip_html(vars_example.group(1))
# Example response
resp_example = re.search(r'class="[^"]*operation-response-example[^"]*"[^>]*>.*?]*>(.*?)', content, re.DOTALL)
if resp_example:
op['example_response'] = strip_html(resp_example.group(1))
operations.append(op)
# Count stats
queries = [op for op in operations if op['type'] == 'query']
mutations = [op for op in operations if op['type'] == 'mutation']
subscriptions = [op for op in operations if op['type'] == 'subscription']
print(f"\nResults:")
print(f" Queries: {len(queries)}")
print(f" Mutations: {len(mutations)}")
print(f" Subscriptions: {len(subscriptions)}")
with_desc = sum(1 for op in operations if op['description'])
with_args = sum(1 for op in operations if op['arguments'])
with_resp = sum(1 for op in operations if op['response_type'])
with_example = sum(1 for op in operations if op['example_query'])
print(f" With description: {with_desc}/{len(operations)}")
print(f" With arguments: {with_args}/{len(operations)}")
print(f" With response type: {with_resp}/{len(operations)}")
print(f" With example query: {with_example}/{len(operations)}")
# Save
with open('/Users/daveporter/Desktop/CODING-2024/DALIM-API/docs/api_parsed.json', 'w') as f:
json.dump({'operations': operations}, f, indent=2)
print("\nSaved to docs/api_parsed.json")
# Print a few samples
for name in ['activities', 'createProject', 'createUser', 'search', 'assets']:
matches = [op for op in operations if op['name'] == name]
if matches:
op = matches[0]
print(f"\n--- {op['type']} {op['name']} ---")
print(f" Desc: {op['description'][:100]}")
print(f" Response: {op['response_type']}")
print(f" Args ({len(op['arguments'])}):")
for a in op['arguments'][:3]:
print(f" {a['name']}: {a['type']} {'(required)' if a['required'] else ''} - {a['description'][:60]}")
if len(op['arguments']) > 3:
print(f" ... and {len(op['arguments'])-3} more")
if __name__ == '__main__':
main()