mirror of
https://github.com/Open-Cascade-SAS/OCCT.git
synced 2026-05-10 17:40:24 +08:00
Created a new CI step to validate the presence of a copyright notice in each source file. Updated existed files to include the copyright notice.
374 lines
12 KiB
Python
Executable File
374 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Script to validate and add OCCT license headers to C++ source files.
|
|
|
|
Validates that files contain the Open CASCADE Technology license header.
|
|
Automatically adds missing headers with the current year.
|
|
|
|
Processes: .cxx, .hxx, .pxx, .lxx files
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import List, Tuple, Optional
|
|
|
|
|
|
# The canonical license text (normalized for comparison)
|
|
CANONICAL_LICENSE_LINES = [
|
|
"This file is part of Open CASCADE Technology software library.",
|
|
"",
|
|
"This library is free software; you can redistribute it and/or modify it under",
|
|
"the terms of the GNU Lesser General Public License version 2.1 as published",
|
|
"by the Free Software Foundation, with special exception defined in the file",
|
|
"OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT",
|
|
"distribution for complete text of the license and disclaimer of any warranty.",
|
|
"",
|
|
"Alternatively, this file may be used under the terms of Open CASCADE",
|
|
"commercial license or contractual agreement.",
|
|
]
|
|
|
|
|
|
def normalize_text(text: str) -> str:
|
|
"""Normalize text for comparison by removing extra whitespace and converting to lowercase."""
|
|
# Remove multiple spaces, convert to lowercase, strip whitespace
|
|
return ' '.join(text.lower().split())
|
|
|
|
|
|
def generate_license_header(year: Optional[int] = None) -> str:
|
|
"""
|
|
Generate the license header with C++ style comments.
|
|
|
|
Args:
|
|
year: Optional year to use. If None, uses current year.
|
|
|
|
Returns:
|
|
Complete license header as string
|
|
"""
|
|
if year is None:
|
|
year = datetime.now().year
|
|
|
|
header_lines = [
|
|
f"// Copyright (c) {year} OPEN CASCADE SAS",
|
|
"//",
|
|
"// This file is part of Open CASCADE Technology software library.",
|
|
"//",
|
|
"// This library is free software; you can redistribute it and/or modify it under",
|
|
"// the terms of the GNU Lesser General Public License version 2.1 as published",
|
|
"// by the Free Software Foundation, with special exception defined in the file",
|
|
"// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT",
|
|
"// distribution for complete text of the license and disclaimer of any warranty.",
|
|
"//",
|
|
"// Alternatively, this file may be used under the terms of Open CASCADE",
|
|
"// commercial license or contractual agreement.",
|
|
"" # Empty line after header
|
|
]
|
|
|
|
return '\n'.join(header_lines) + '\n'
|
|
|
|
|
|
def extract_license_text(lines: List[str], start_idx: int, max_lines: int = 20) -> Tuple[List[str], int]:
|
|
"""
|
|
Extract potential license text from comment block starting at start_idx.
|
|
|
|
Returns:
|
|
Tuple of (extracted_text_lines, end_index)
|
|
"""
|
|
extracted = []
|
|
current_idx = start_idx
|
|
in_block_comment = False
|
|
|
|
while current_idx < len(lines) and current_idx < start_idx + max_lines:
|
|
line = lines[current_idx].rstrip()
|
|
|
|
# Check for start of block comment
|
|
if line.strip() == '/*' or line.strip().startswith('/*'):
|
|
in_block_comment = True
|
|
# Extract text after /* if any
|
|
text_after = line.strip()[2:].strip()
|
|
if text_after and not text_after.startswith('*'):
|
|
extracted.append(text_after)
|
|
current_idx += 1
|
|
continue
|
|
|
|
# Check for end of block comment
|
|
if '*/' in line:
|
|
in_block_comment = False
|
|
# Extract text before */ if any
|
|
text_before = line.split('*/')[0].strip().lstrip('*').strip()
|
|
if text_before:
|
|
extracted.append(text_before)
|
|
current_idx += 1
|
|
break
|
|
|
|
# Inside block comment
|
|
if in_block_comment:
|
|
# Remove leading spaces and asterisks
|
|
text_content = line.strip().lstrip('*').strip()
|
|
extracted.append(text_content)
|
|
current_idx += 1
|
|
continue
|
|
|
|
# Check for // style comment
|
|
if line.strip().startswith('//'):
|
|
text_content = line.strip()[2:].strip()
|
|
extracted.append(text_content)
|
|
current_idx += 1
|
|
continue
|
|
|
|
# If we're not in a comment and line is not a comment, stop
|
|
if line.strip() and not in_block_comment:
|
|
break
|
|
|
|
current_idx += 1
|
|
|
|
return extracted, current_idx
|
|
|
|
|
|
def check_license_header(filepath: str) -> Tuple[bool, Optional[int], int]:
|
|
"""
|
|
Check if file has a valid license header.
|
|
|
|
Returns:
|
|
Tuple of (has_valid_license, line_where_found, header_end_line)
|
|
If no valid license found, returns (False, None, 0)
|
|
"""
|
|
try:
|
|
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
|
lines = f.readlines()
|
|
except Exception as e:
|
|
print(f"Error reading {filepath}: {e}")
|
|
return False, None, 0
|
|
|
|
if not lines:
|
|
return False, None, 0
|
|
|
|
# Check first 30 lines for license header
|
|
for start_line in range(min(30, len(lines))):
|
|
extracted, end_idx = extract_license_text(lines, start_line, max_lines=25)
|
|
|
|
if len(extracted) < 5: # Too short to be a license
|
|
continue
|
|
|
|
# Compare normalized text
|
|
matched_lines = 0
|
|
for canonical_line in CANONICAL_LICENSE_LINES:
|
|
normalized_canonical = normalize_text(canonical_line)
|
|
|
|
# Skip empty lines in comparison
|
|
if not normalized_canonical:
|
|
continue
|
|
|
|
# Check if any extracted line matches this canonical line
|
|
for extracted_line in extracted:
|
|
normalized_extracted = normalize_text(extracted_line)
|
|
|
|
if normalized_canonical in normalized_extracted or normalized_extracted in normalized_canonical:
|
|
matched_lines += 1
|
|
break
|
|
|
|
# If we matched most of the key lines (allowing some variation)
|
|
if matched_lines >= 7: # At least 7 out of ~9 non-empty lines
|
|
return True, start_line, end_idx
|
|
|
|
return False, None, 0
|
|
|
|
|
|
def add_license_header(filepath: str, dry_run: bool = False) -> bool:
|
|
"""
|
|
Add license header to file if missing.
|
|
|
|
Returns:
|
|
True if file was modified, False otherwise
|
|
"""
|
|
try:
|
|
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
|
content = f.read()
|
|
except Exception as e:
|
|
print(f"Error reading {filepath}: {e}")
|
|
return False
|
|
|
|
# Generate header with current year
|
|
header = generate_license_header()
|
|
|
|
# Check if file starts with shebang
|
|
new_content = content
|
|
if content.startswith('#!'):
|
|
# Preserve shebang line
|
|
first_newline = content.find('\n')
|
|
if first_newline != -1:
|
|
shebang = content[:first_newline + 1]
|
|
rest = content[first_newline + 1:]
|
|
new_content = shebang + '\n' + header + '\n' + rest
|
|
else:
|
|
new_content = content + '\n' + header
|
|
else:
|
|
# Add header at the beginning
|
|
new_content = header + '\n' + content
|
|
|
|
if not dry_run:
|
|
try:
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.write(new_content)
|
|
except Exception as e:
|
|
print(f"Error writing {filepath}: {e}")
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def find_files(root_dir: str, extensions: List[str]) -> List[str]:
|
|
"""Find all files with specified extensions in the directory tree."""
|
|
files = []
|
|
for ext in extensions:
|
|
files.extend(Path(root_dir).rglob(f"*{ext}"))
|
|
return [str(f) for f in files]
|
|
|
|
|
|
def process_file(filepath: str, fix: bool = False, dry_run: bool = False) -> Tuple[bool, bool]:
|
|
"""
|
|
Process a single file to check/fix license header.
|
|
|
|
Returns:
|
|
Tuple of (has_license, was_modified)
|
|
"""
|
|
has_license, _, _ = check_license_header(filepath)
|
|
|
|
if has_license:
|
|
return True, False
|
|
|
|
# License is missing
|
|
if fix:
|
|
modified = add_license_header(filepath, dry_run)
|
|
return False, modified
|
|
|
|
return False, False
|
|
|
|
|
|
def main():
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description='Validate and add OCCT license headers to C++ source files'
|
|
)
|
|
parser.add_argument(
|
|
'path',
|
|
nargs='?',
|
|
default='src',
|
|
help='Root directory or file to process (default: src)'
|
|
)
|
|
parser.add_argument(
|
|
'--fix',
|
|
action='store_true',
|
|
help='Automatically add missing license headers'
|
|
)
|
|
parser.add_argument(
|
|
'--dry-run',
|
|
action='store_true',
|
|
help='Show what would be changed without modifying files (implies --fix)'
|
|
)
|
|
parser.add_argument(
|
|
'--extensions',
|
|
nargs='+',
|
|
default=['.cxx', '.hxx', '.pxx', '.lxx'],
|
|
help='File extensions to process (default: .cxx .hxx .pxx .lxx)'
|
|
)
|
|
parser.add_argument(
|
|
'--files',
|
|
nargs='+',
|
|
help='Specific files to process (overrides path scanning)'
|
|
)
|
|
parser.add_argument(
|
|
'--ci',
|
|
action='store_true',
|
|
help='CI mode: exit with error code if any file is missing license'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Dry run implies fix mode
|
|
if args.dry_run:
|
|
args.fix = True
|
|
|
|
# Get list of files to process
|
|
if args.files:
|
|
# Process specific files
|
|
files = [os.path.abspath(f) for f in args.files if os.path.isfile(f)]
|
|
if len(files) == 0:
|
|
print("Error: No valid files specified")
|
|
return 1
|
|
else:
|
|
# Process directory or single file
|
|
path = os.path.abspath(args.path)
|
|
|
|
if os.path.isfile(path):
|
|
files = [path]
|
|
elif os.path.isdir(path):
|
|
print(f"Scanning for files in: {path}")
|
|
print(f"Extensions: {', '.join(args.extensions)}")
|
|
files = find_files(path, args.extensions)
|
|
print(f"Found {len(files)} files to process\n")
|
|
else:
|
|
print(f"Error: {path} is not a valid file or directory")
|
|
return 1
|
|
|
|
if args.dry_run:
|
|
print("DRY RUN MODE - No files will be modified\n")
|
|
|
|
# Process files
|
|
missing_license = []
|
|
fixed_files = []
|
|
|
|
for filepath in sorted(files):
|
|
has_license, was_modified = process_file(filepath, args.fix, args.dry_run)
|
|
|
|
if not has_license and not was_modified:
|
|
missing_license.append(filepath)
|
|
elif was_modified:
|
|
fixed_files.append(filepath)
|
|
|
|
# Print results
|
|
if missing_license:
|
|
print(f"\n{'='*70}")
|
|
print(f"FILES MISSING LICENSE HEADER ({len(missing_license)})")
|
|
print(f"{'='*70}")
|
|
for filepath in missing_license:
|
|
print(f" {filepath}")
|
|
|
|
if fixed_files:
|
|
print(f"\n{'='*70}")
|
|
print(f"FILES {'THAT WOULD BE ' if args.dry_run else ''}FIXED ({len(fixed_files)})")
|
|
print(f"{'='*70}")
|
|
for filepath in fixed_files:
|
|
print(f" {filepath}")
|
|
|
|
# Summary
|
|
print(f"\n{'='*70}")
|
|
print("SUMMARY")
|
|
print(f"{'='*70}")
|
|
print(f"Files processed: {len(files)}")
|
|
print(f"Files with valid license: {len(files) - len(missing_license) - len(fixed_files)}")
|
|
print(f"Files missing license: {len(missing_license)}")
|
|
if args.fix:
|
|
print(f"Files {'that would be ' if args.dry_run else ''}fixed: {len(fixed_files)}")
|
|
|
|
if args.dry_run:
|
|
print("\nThis was a dry run. Use --fix without --dry-run to apply changes.")
|
|
|
|
# Exit with error in CI mode if any files are missing license
|
|
if args.ci and (missing_license or (fixed_files and not args.fix)):
|
|
print("\n[CI MODE] License validation FAILED")
|
|
return 1
|
|
|
|
if not args.fix and missing_license:
|
|
print(f"\nUse --fix to automatically add license headers to files missing them.")
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
exit(main())
|