Coding - Checking for copyright notice (#749)

Created a new CI step to validate the presence of a copyright notice in each source file.
Updated existed files to include the copyright notice.
This commit is contained in:
Pasukhin Dmitry
2025-10-16 18:39:56 +01:00
committed by GitHub
parent 184fb9bf56
commit f4741d647f
62 changed files with 1144 additions and 12 deletions

373
.github/actions/scripts/validate-license.py vendored Executable file
View File

@@ -0,0 +1,373 @@
#!/usr/bin/env python3
"""
Script to validate and add OCCT license headers to C++ source files.
Validates that files contain the Open CASCADE Technology license header.
Automatically adds missing headers with the current year.
Processes: .cxx, .hxx, .pxx, .lxx files
"""
import os
import re
import sys
from datetime import datetime
from pathlib import Path
from typing import List, Tuple, Optional
# The canonical license text (normalized for comparison)
CANONICAL_LICENSE_LINES = [
"This file is part of Open CASCADE Technology software library.",
"",
"This library is free software; you can redistribute it and/or modify it under",
"the terms of the GNU Lesser General Public License version 2.1 as published",
"by the Free Software Foundation, with special exception defined in the file",
"OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT",
"distribution for complete text of the license and disclaimer of any warranty.",
"",
"Alternatively, this file may be used under the terms of Open CASCADE",
"commercial license or contractual agreement.",
]
def normalize_text(text: str) -> str:
"""Normalize text for comparison by removing extra whitespace and converting to lowercase."""
# Remove multiple spaces, convert to lowercase, strip whitespace
return ' '.join(text.lower().split())
def generate_license_header(year: Optional[int] = None) -> str:
"""
Generate the license header with C++ style comments.
Args:
year: Optional year to use. If None, uses current year.
Returns:
Complete license header as string
"""
if year is None:
year = datetime.now().year
header_lines = [
f"// Copyright (c) {year} OPEN CASCADE SAS",
"//",
"// This file is part of Open CASCADE Technology software library.",
"//",
"// This library is free software; you can redistribute it and/or modify it under",
"// the terms of the GNU Lesser General Public License version 2.1 as published",
"// by the Free Software Foundation, with special exception defined in the file",
"// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT",
"// distribution for complete text of the license and disclaimer of any warranty.",
"//",
"// Alternatively, this file may be used under the terms of Open CASCADE",
"// commercial license or contractual agreement.",
"" # Empty line after header
]
return '\n'.join(header_lines) + '\n'
def extract_license_text(lines: List[str], start_idx: int, max_lines: int = 20) -> Tuple[List[str], int]:
"""
Extract potential license text from comment block starting at start_idx.
Returns:
Tuple of (extracted_text_lines, end_index)
"""
extracted = []
current_idx = start_idx
in_block_comment = False
while current_idx < len(lines) and current_idx < start_idx + max_lines:
line = lines[current_idx].rstrip()
# Check for start of block comment
if line.strip() == '/*' or line.strip().startswith('/*'):
in_block_comment = True
# Extract text after /* if any
text_after = line.strip()[2:].strip()
if text_after and not text_after.startswith('*'):
extracted.append(text_after)
current_idx += 1
continue
# Check for end of block comment
if '*/' in line:
in_block_comment = False
# Extract text before */ if any
text_before = line.split('*/')[0].strip().lstrip('*').strip()
if text_before:
extracted.append(text_before)
current_idx += 1
break
# Inside block comment
if in_block_comment:
# Remove leading spaces and asterisks
text_content = line.strip().lstrip('*').strip()
extracted.append(text_content)
current_idx += 1
continue
# Check for // style comment
if line.strip().startswith('//'):
text_content = line.strip()[2:].strip()
extracted.append(text_content)
current_idx += 1
continue
# If we're not in a comment and line is not a comment, stop
if line.strip() and not in_block_comment:
break
current_idx += 1
return extracted, current_idx
def check_license_header(filepath: str) -> Tuple[bool, Optional[int], int]:
"""
Check if file has a valid license header.
Returns:
Tuple of (has_valid_license, line_where_found, header_end_line)
If no valid license found, returns (False, None, 0)
"""
try:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
except Exception as e:
print(f"Error reading {filepath}: {e}")
return False, None, 0
if not lines:
return False, None, 0
# Check first 30 lines for license header
for start_line in range(min(30, len(lines))):
extracted, end_idx = extract_license_text(lines, start_line, max_lines=25)
if len(extracted) < 5: # Too short to be a license
continue
# Compare normalized text
matched_lines = 0
for canonical_line in CANONICAL_LICENSE_LINES:
normalized_canonical = normalize_text(canonical_line)
# Skip empty lines in comparison
if not normalized_canonical:
continue
# Check if any extracted line matches this canonical line
for extracted_line in extracted:
normalized_extracted = normalize_text(extracted_line)
if normalized_canonical in normalized_extracted or normalized_extracted in normalized_canonical:
matched_lines += 1
break
# If we matched most of the key lines (allowing some variation)
if matched_lines >= 7: # At least 7 out of ~9 non-empty lines
return True, start_line, end_idx
return False, None, 0
def add_license_header(filepath: str, dry_run: bool = False) -> bool:
"""
Add license header to file if missing.
Returns:
True if file was modified, False otherwise
"""
try:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
except Exception as e:
print(f"Error reading {filepath}: {e}")
return False
# Generate header with current year
header = generate_license_header()
# Check if file starts with shebang
new_content = content
if content.startswith('#!'):
# Preserve shebang line
first_newline = content.find('\n')
if first_newline != -1:
shebang = content[:first_newline + 1]
rest = content[first_newline + 1:]
new_content = shebang + '\n' + header + '\n' + rest
else:
new_content = content + '\n' + header
else:
# Add header at the beginning
new_content = header + '\n' + content
if not dry_run:
try:
with open(filepath, 'w', encoding='utf-8') as f:
f.write(new_content)
except Exception as e:
print(f"Error writing {filepath}: {e}")
return False
return True
def find_files(root_dir: str, extensions: List[str]) -> List[str]:
"""Find all files with specified extensions in the directory tree."""
files = []
for ext in extensions:
files.extend(Path(root_dir).rglob(f"*{ext}"))
return [str(f) for f in files]
def process_file(filepath: str, fix: bool = False, dry_run: bool = False) -> Tuple[bool, bool]:
"""
Process a single file to check/fix license header.
Returns:
Tuple of (has_license, was_modified)
"""
has_license, _, _ = check_license_header(filepath)
if has_license:
return True, False
# License is missing
if fix:
modified = add_license_header(filepath, dry_run)
return False, modified
return False, False
def main():
import argparse
parser = argparse.ArgumentParser(
description='Validate and add OCCT license headers to C++ source files'
)
parser.add_argument(
'path',
nargs='?',
default='src',
help='Root directory or file to process (default: src)'
)
parser.add_argument(
'--fix',
action='store_true',
help='Automatically add missing license headers'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Show what would be changed without modifying files (implies --fix)'
)
parser.add_argument(
'--extensions',
nargs='+',
default=['.cxx', '.hxx', '.pxx', '.lxx'],
help='File extensions to process (default: .cxx .hxx .pxx .lxx)'
)
parser.add_argument(
'--files',
nargs='+',
help='Specific files to process (overrides path scanning)'
)
parser.add_argument(
'--ci',
action='store_true',
help='CI mode: exit with error code if any file is missing license'
)
args = parser.parse_args()
# Dry run implies fix mode
if args.dry_run:
args.fix = True
# Get list of files to process
if args.files:
# Process specific files
files = [os.path.abspath(f) for f in args.files if os.path.isfile(f)]
if len(files) == 0:
print("Error: No valid files specified")
return 1
else:
# Process directory or single file
path = os.path.abspath(args.path)
if os.path.isfile(path):
files = [path]
elif os.path.isdir(path):
print(f"Scanning for files in: {path}")
print(f"Extensions: {', '.join(args.extensions)}")
files = find_files(path, args.extensions)
print(f"Found {len(files)} files to process\n")
else:
print(f"Error: {path} is not a valid file or directory")
return 1
if args.dry_run:
print("DRY RUN MODE - No files will be modified\n")
# Process files
missing_license = []
fixed_files = []
for filepath in sorted(files):
has_license, was_modified = process_file(filepath, args.fix, args.dry_run)
if not has_license and not was_modified:
missing_license.append(filepath)
elif was_modified:
fixed_files.append(filepath)
# Print results
if missing_license:
print(f"\n{'='*70}")
print(f"FILES MISSING LICENSE HEADER ({len(missing_license)})")
print(f"{'='*70}")
for filepath in missing_license:
print(f" {filepath}")
if fixed_files:
print(f"\n{'='*70}")
print(f"FILES {'THAT WOULD BE ' if args.dry_run else ''}FIXED ({len(fixed_files)})")
print(f"{'='*70}")
for filepath in fixed_files:
print(f" {filepath}")
# Summary
print(f"\n{'='*70}")
print("SUMMARY")
print(f"{'='*70}")
print(f"Files processed: {len(files)}")
print(f"Files with valid license: {len(files) - len(missing_license) - len(fixed_files)}")
print(f"Files missing license: {len(missing_license)}")
if args.fix:
print(f"Files {'that would be ' if args.dry_run else ''}fixed: {len(fixed_files)}")
if args.dry_run:
print("\nThis was a dry run. Use --fix without --dry-run to apply changes.")
# Exit with error in CI mode if any files are missing license
if args.ci and (missing_license or (fixed_files and not args.fix)):
print("\n[CI MODE] License validation FAILED")
return 1
if not args.fix and missing_license:
print(f"\nUse --fix to automatically add license headers to files missing them.")
return 1
return 0
if __name__ == '__main__':
exit(main())