#!/usr/bin/env python3 # Copyright (c) 2025 OPEN CASCADE SAS # # This file is part of Open CASCADE Technology software library. # # This library is free software; you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License version 2.1 as published # by the Free Software Foundation, with special exception defined in the file # OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT # distribution for complete text of the license and disclaimer of any warranty. # # Alternatively, this file may be used under the terms of Open CASCADE # commercial license or contractual agreement. """ Script to validate and add OCCT license headers to C++ source files. Validates that files contain the Open CASCADE Technology license header. Automatically adds missing headers with the current year. Processes: .cxx, .hxx, .pxx, .lxx files """ import os import re import sys from datetime import datetime from pathlib import Path from typing import List, Tuple, Optional # The canonical license text (normalized for comparison) CANONICAL_LICENSE_LINES = [ "This file is part of Open CASCADE Technology software library.", "", "This library is free software; you can redistribute it and/or modify it under", "the terms of the GNU Lesser General Public License version 2.1 as published", "by the Free Software Foundation, with special exception defined in the file", "OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT", "distribution for complete text of the license and disclaimer of any warranty.", "", "Alternatively, this file may be used under the terms of Open CASCADE", "commercial license or contractual agreement.", ] def normalize_text(text: str) -> str: """Normalize text for comparison by removing extra whitespace and converting to lowercase.""" # Remove multiple spaces, convert to lowercase, strip whitespace return ' '.join(text.lower().split()) def generate_license_header(year: Optional[int] = None) -> str: """ Generate the license header with C++ style comments. Args: year: Optional year to use. If None, uses current year. Returns: Complete license header as string """ if year is None: year = datetime.now().year header_lines = [ f"// Copyright (c) {year} OPEN CASCADE SAS", "//", "// This file is part of Open CASCADE Technology software library.", "//", "// This library is free software; you can redistribute it and/or modify it under", "// the terms of the GNU Lesser General Public License version 2.1 as published", "// by the Free Software Foundation, with special exception defined in the file", "// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT", "// distribution for complete text of the license and disclaimer of any warranty.", "//", "// Alternatively, this file may be used under the terms of Open CASCADE", "// commercial license or contractual agreement.", "" # Empty line after header ] return '\n'.join(header_lines) + '\n' def extract_license_text(lines: List[str], start_idx: int, max_lines: int = 20) -> Tuple[List[str], int]: """ Extract potential license text from comment block starting at start_idx. Returns: Tuple of (extracted_text_lines, end_index) """ extracted = [] current_idx = start_idx in_block_comment = False while current_idx < len(lines) and current_idx < start_idx + max_lines: line = lines[current_idx].rstrip() # Check for start of block comment if line.strip() == '/*' or line.strip().startswith('/*'): in_block_comment = True # Extract text after /* if any text_after = line.strip()[2:].strip() if text_after and not text_after.startswith('*'): extracted.append(text_after) current_idx += 1 continue # Check for end of block comment if '*/' in line: in_block_comment = False # Extract text before */ if any text_before = line.split('*/')[0].strip().lstrip('*').strip() if text_before: extracted.append(text_before) current_idx += 1 break # Inside block comment if in_block_comment: # Remove leading spaces and asterisks text_content = line.strip().lstrip('*').strip() extracted.append(text_content) current_idx += 1 continue # Check for // style comment if line.strip().startswith('//'): text_content = line.strip()[2:].strip() extracted.append(text_content) current_idx += 1 continue # If we're not in a comment and line is not a comment, stop if line.strip() and not in_block_comment: break current_idx += 1 return extracted, current_idx def check_license_header(filepath: str) -> Tuple[bool, Optional[int], int]: """ Check if file has a valid license header. Returns: Tuple of (has_valid_license, line_where_found, header_end_line) If no valid license found, returns (False, None, 0) """ try: with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: lines = f.readlines() except Exception as e: print(f"Error reading {filepath}: {e}") return False, None, 0 if not lines: return False, None, 0 # Check first 30 lines for license header for start_line in range(min(30, len(lines))): extracted, end_idx = extract_license_text(lines, start_line, max_lines=25) if len(extracted) < 5: # Too short to be a license continue # Compare normalized text matched_lines = 0 for canonical_line in CANONICAL_LICENSE_LINES: normalized_canonical = normalize_text(canonical_line) # Skip empty lines in comparison if not normalized_canonical: continue # Check if any extracted line matches this canonical line for extracted_line in extracted: normalized_extracted = normalize_text(extracted_line) if normalized_canonical in normalized_extracted or normalized_extracted in normalized_canonical: matched_lines += 1 break # If we matched most of the key lines (allowing some variation) if matched_lines >= 7: # At least 7 out of ~9 non-empty lines return True, start_line, end_idx return False, None, 0 def add_license_header(filepath: str, dry_run: bool = False) -> bool: """ Add license header to file if missing. Returns: True if file was modified, False otherwise """ try: with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() except Exception as e: print(f"Error reading {filepath}: {e}") return False # Generate header with current year header = generate_license_header() # Check if file starts with shebang new_content = content if content.startswith('#!'): # Preserve shebang line first_newline = content.find('\n') if first_newline != -1: shebang = content[:first_newline + 1] rest = content[first_newline + 1:] new_content = shebang + '\n' + header + '\n' + rest else: new_content = content + '\n' + header else: # Add header at the beginning new_content = header + '\n' + content if not dry_run: try: with open(filepath, 'w', encoding='utf-8') as f: f.write(new_content) except Exception as e: print(f"Error writing {filepath}: {e}") return False return True def find_files(root_dir: str, extensions: List[str]) -> List[str]: """Find all files with specified extensions in the directory tree.""" files = [] for ext in extensions: files.extend(Path(root_dir).rglob(f"*{ext}")) return [str(f) for f in files] def process_file(filepath: str, fix: bool = False, dry_run: bool = False) -> Tuple[bool, bool]: """ Process a single file to check/fix license header. Returns: Tuple of (has_license, was_modified) """ has_license, _, _ = check_license_header(filepath) if has_license: return True, False # License is missing if fix: modified = add_license_header(filepath, dry_run) return False, modified return False, False def main(): import argparse parser = argparse.ArgumentParser( description='Validate and add OCCT license headers to C++ source files' ) parser.add_argument( 'path', nargs='?', default='src', help='Root directory or file to process (default: src)' ) parser.add_argument( '--fix', action='store_true', help='Automatically add missing license headers' ) parser.add_argument( '--dry-run', action='store_true', help='Show what would be changed without modifying files (implies --fix)' ) parser.add_argument( '--extensions', nargs='+', default=['.cxx', '.hxx', '.pxx', '.lxx'], help='File extensions to process (default: .cxx .hxx .pxx .lxx)' ) parser.add_argument( '--files', nargs='+', help='Specific files to process (overrides path scanning)' ) parser.add_argument( '--file-list', help='Path to a file containing list of files to process (one per line)' ) parser.add_argument( '--ci', action='store_true', help='CI mode: exit with error code if any file is missing license' ) args = parser.parse_args() # Dry run implies fix mode if args.dry_run: args.fix = True # Get list of files to process if args.file_list: # Read files from a list file try: with open(args.file_list, 'r', encoding='utf-8') as f: file_paths = [line.strip() for line in f if line.strip()] files = [os.path.abspath(f) for f in file_paths if os.path.isfile(f)] except Exception as e: print(f"Error reading file list: {e}") return 1 if len(files) == 0: print("No valid files found in file list") return 0 elif args.files: # Process specific files files = [os.path.abspath(f) for f in args.files if os.path.isfile(f)] if len(files) == 0: print("Error: No valid files specified") return 1 else: # Process directory or single file path = os.path.abspath(args.path) if os.path.isfile(path): files = [path] elif os.path.isdir(path): print(f"Scanning for files in: {path}") print(f"Extensions: {', '.join(args.extensions)}") files = find_files(path, args.extensions) print(f"Found {len(files)} files to process\n") else: print(f"Error: {path} is not a valid file or directory") return 1 if args.dry_run: print("DRY RUN MODE - No files will be modified\n") # Process files missing_license = [] fixed_files = [] for filepath in sorted(files): has_license, was_modified = process_file(filepath, args.fix, args.dry_run) if not has_license and not was_modified: missing_license.append(filepath) elif was_modified: fixed_files.append(filepath) # Print results if missing_license: print(f"\n{'='*70}") print(f"FILES MISSING LICENSE HEADER ({len(missing_license)})") print(f"{'='*70}") for filepath in missing_license: print(f" {filepath}") if fixed_files: print(f"\n{'='*70}") print(f"FILES {'THAT WOULD BE ' if args.dry_run else ''}FIXED ({len(fixed_files)})") print(f"{'='*70}") for filepath in fixed_files: print(f" {filepath}") # Summary print(f"\n{'='*70}") print("SUMMARY") print(f"{'='*70}") print(f"Files processed: {len(files)}") print(f"Files with valid license: {len(files) - len(missing_license) - len(fixed_files)}") print(f"Files missing license: {len(missing_license)}") if args.fix: print(f"Files {'that would be ' if args.dry_run else ''}fixed: {len(fixed_files)}") if args.dry_run: print("\nThis was a dry run. Use --fix without --dry-run to apply changes.") # Exit with error in CI mode if any files are missing license if args.ci and (missing_license or (fixed_files and not args.fix)): print("\n[CI MODE] License validation FAILED") return 1 if not args.fix and missing_license: print(f"\nUse --fix to automatically add license headers to files missing them.") return 1 return 0 if __name__ == '__main__': exit(main())