Refactored it a bit
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1,6 +1,5 @@
|
||||
.venv
|
||||
build
|
||||
hulud_check.egg-info
|
||||
*.egg-info
|
||||
patternfile
|
||||
|
||||
*.egg-info
|
||||
|
||||
|
||||
@@ -1,20 +1,10 @@
|
||||
FROM python:3.13-slim-trixie
|
||||
|
||||
WORKDIR /opt/hulud_check
|
||||
WORKDIR /opt/glchecka
|
||||
RUN apt-get update && apt-get install -y ripgrep git curl
|
||||
ADD check_gitlab.py .
|
||||
ADD pyproject.toml .
|
||||
ADD entrypoint.sh .
|
||||
RUN pip3 install .
|
||||
|
||||
COPY --chmod=755 <<EOF /opt/hulud_check/entrypoint.sh
|
||||
#!/bin/bash
|
||||
echo "Get most recent defintions"
|
||||
curl -s https://raw.githubusercontent.com/wiz-sec-public/wiz-research-iocs/refs/heads/main/reports/shai-hulud-2-packages.csv > sha1-hulud-2-packages.csv
|
||||
echo "Format patterns"
|
||||
tail -n +2 sha1-hulud-2-packages.csv | awk -F ',' '{print \$1}' > patternfile
|
||||
echo "Running check"
|
||||
python3 -u check_gitlab.py
|
||||
EOF
|
||||
|
||||
ENTRYPOINT ["/opt/hulud_check/entrypoint.sh"]
|
||||
ENTRYPOINT ["python3", "-u", "check_gitlab.py"]
|
||||
|
||||
42
README.md
Normal file
42
README.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Gitlab Repository Checker
|
||||
|
||||
This tool acts as a small helper to find specific strings in files (e.g. axios 1.4.1 in package.json).
|
||||
It will query the Gitlab API for groups and projects and pull repos locally via HTTPS and search them via ripgrep.
|
||||
|
||||
In the end a CSV report will be generated with the findings.
|
||||
|
||||
It can also be used for all kinds of purposes for automated repository searches.
|
||||
|
||||
## Configuration
|
||||
|
||||
You can configure the tool via environment variables:
|
||||
|
||||
```shell
|
||||
|
||||
GITLAB_URL # Required - Defines the Gitlab URL (for Self Hosted instances or Managed)
|
||||
GITLAB_PAT # Required - A Gitlab personal access tokens with permissions to read groups and projects
|
||||
GITLAB_GROUP_ID # Optional - An ID of a specific group, default is all groups will be checked for repositories
|
||||
GLOB_FILE # Optional - A file containing glob patterns or filenames to search patterns for (Its recursive in all subdirs)
|
||||
GIT_TMP_ROOT # Optional - The path where repositories are cloned to for scanning - Default - /tmp/repo_check
|
||||
REPORT_PATH # Optional - The path where reports are stored - Default - /tmp/check_reports
|
||||
REPORT_FILE # Optional - The Filename for the report - Default - report.csv
|
||||
PATTERN_FILE # Optional - The file containing the regex patterns which ripgrep will use to match strings - Default - patternfile
|
||||
|
||||
```
|
||||
|
||||
## Execution
|
||||
|
||||
```shell
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install .
|
||||
python -u check_gitlab.py
|
||||
```
|
||||
|
||||
You can also try to use the Dockerfile
|
||||
|
||||
## Important
|
||||
|
||||
> *This product is 100% AI Free.
|
||||
> **It is not nice code. Made in a hurry and and with low attention span.
|
||||
> ***Product is not vegan. Might contain bugs and nuts.
|
||||
208
check_gitlab.py
208
check_gitlab.py
@@ -1,12 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Very hacky quick check for sha1-hulud for gitlab repos
|
||||
# Need to set the GITLAB_URL and GITLAB_PAT
|
||||
# Also use the CSV provided by https://github.com/wiz-sec-public/wiz-research-iocs/blob/main/reports/shai-hulud-2-packages.csv and create a patternfile
|
||||
# Patternfile creation:
|
||||
# curl https://raw.githubusercontent.com/wiz-sec-public/wiz-research-iocs/refs/heads/main/reports/shai-hulud-2-packages.csv > sha1-hulud-2-packages.csv
|
||||
# tail -n +2 sha1-hulud-2-packages.csv | awk -F ',' '{print $1}' > patternfile
|
||||
|
||||
# pip install GitPython requests
|
||||
# You need to have ripgrep installed too
|
||||
# apt-get install ripgrep
|
||||
@@ -53,59 +46,138 @@ class Report():
|
||||
for row in self.findings:
|
||||
writer.writerow(row)
|
||||
|
||||
def get_all_projects(next_link=None, group_id=None, prev_result=[]):
|
||||
base_path = '/api/v4'
|
||||
url_params = ["include_subgroups=true", "per_page=50", "search_namespaces=true", "owned=false", "order_by=id", "sort=asc"]
|
||||
if group_id:
|
||||
base_path += f"/groups/{group_id}"
|
||||
else:
|
||||
url_params.append("pagination=keyset")
|
||||
class GitlabRepositories():
|
||||
|
||||
def __init__(self):
|
||||
|
||||
if not next_link:
|
||||
result = session.query(f"{base_path}/projects?{'&'.join(url_params)}")
|
||||
else:
|
||||
result = session.get(next_link)
|
||||
if not os.environ.get('GITLAB_URL'):
|
||||
print("Environment variable GITLAB_URL not specified")
|
||||
exit(1)
|
||||
|
||||
if not os.environ.get('GITLAB_PAT'):
|
||||
print("Environment variable GITLAB_PAT not specified")
|
||||
exit(1)
|
||||
|
||||
self.projects = []
|
||||
self.groups = []
|
||||
self.base_path = '/api/v4'
|
||||
self.session = GitlabConnector()
|
||||
|
||||
if os.environ.get('GITLAB_GROUP_ID'):
|
||||
self.groups.append(os.environ.get('GITLAB_GROUP_ID'))
|
||||
|
||||
def parse_pagination(self, result_headers):
|
||||
|
||||
ret_val = False
|
||||
if not result_headers.get('Link'):
|
||||
return ret_val
|
||||
|
||||
links = result_headers['Link'].split(', ')
|
||||
|
||||
if result.headers.get('Link'):
|
||||
links = result.headers['Link'].split(', ')
|
||||
for link in links:
|
||||
parts = link.split('; ')
|
||||
rel = parts[1].split('=')[1]
|
||||
if rel == '"next"':
|
||||
link = parts[0].replace('<', '').replace('>', '')
|
||||
ret_val = parts[0].replace('<', '').replace('>', '')
|
||||
break
|
||||
|
||||
prev_result += [{'id': i['id'], 'http_url_to_repo': i['http_url_to_repo'], 'ssh_url_to_repo': i['ssh_url_to_repo'], 'web_url': i['web_url']} for i in result.json()]
|
||||
return ret_val
|
||||
|
||||
# I know, not nice.. but im in a hurry
|
||||
try:
|
||||
if rel == "\"next\"":
|
||||
get_all_projects(next_link=link, group_id=group_id, prev_result=prev_result)
|
||||
except:
|
||||
pass
|
||||
return prev_result
|
||||
|
||||
def clone_repo_with_http(repo_url=None):
|
||||
repo_host_path = repo_url.split('://')[1]
|
||||
repo_http_scheme = repo_url.split('://')[0]
|
||||
repo_credentials = f"token:{session.pat}"
|
||||
repo_remote = f"{repo_http_scheme}://{repo_credentials}@{repo_host_path}"
|
||||
repo_name = repo_host_path.replace('/', '_').rstrip('.git')
|
||||
repo_path = f"{git_tmp_root}/{repo_name}"
|
||||
def get_groups(self, next_link=None):
|
||||
|
||||
result = None
|
||||
if not next_link:
|
||||
result = self.session.query(f"{self.base_path}/groups")
|
||||
else:
|
||||
result = self.session.get(next_link)
|
||||
|
||||
if not result:
|
||||
print("No groups found or permissions not sufficient.")
|
||||
return
|
||||
|
||||
self.groups += [i['id'] for i in result.json()]
|
||||
next_link = self.parse_pagination(result.headers)
|
||||
|
||||
if next_link:
|
||||
self.get_groups(next_link=next_link)
|
||||
|
||||
def get_projects_by_group(self, next_link=None, group_id=None):
|
||||
|
||||
result = None
|
||||
url_params = ["include_subgroups=true", "per_page=20", "search_namespaces=true", "owned=false", "order_by=id", "sort=asc"]
|
||||
group_path = f"{self.base_path}/groups/{group_id}"
|
||||
|
||||
if not next_link:
|
||||
result = self.session.query(f"{group_path}/projects?{'&'.join(url_params)}")
|
||||
else:
|
||||
result = self.session.get(next_link)
|
||||
|
||||
if not result:
|
||||
print(f"No projects in group {group_id} found or permissions not sufficient.")
|
||||
return
|
||||
|
||||
self.projects += [{'id': i['id'], 'http_url_to_repo': i['http_url_to_repo'], 'ssh_url_to_repo': i['ssh_url_to_repo'], 'web_url': i['web_url']} for i in result.json()]
|
||||
next_link = self.parse_pagination(result.headers)
|
||||
|
||||
if next_link:
|
||||
self.get_projects_by_group(next_link=next_link, group_id=group_id)
|
||||
|
||||
#print(result.json())
|
||||
|
||||
def get_projects(self):
|
||||
print("Getting GitLab Projects")
|
||||
# When groups not empty or specified, parse groups
|
||||
if not self.groups:
|
||||
self.get_groups()
|
||||
|
||||
for group_id in self.groups:
|
||||
print(f"Getting Projects for Group {group_id}")
|
||||
self.get_projects_by_group(group_id=group_id)
|
||||
|
||||
print(len(self.projects))
|
||||
|
||||
|
||||
def clone_repo(self, repo_url=None):
|
||||
repo_host_path = repo_url.split('://')[1]
|
||||
repo_http_scheme = repo_url.split('://')[0]
|
||||
repo_credentials = f"token:{self.session.pat}"
|
||||
repo_remote = f"{repo_http_scheme}://{repo_credentials}@{repo_host_path}"
|
||||
repo_name = repo_host_path.replace('/', '_').rstrip('.git')
|
||||
repo_path = f"{git_tmp_root}/{repo_name}"
|
||||
|
||||
if os.path.isdir(repo_path) and os.listdir(repo_path):
|
||||
return repo_path
|
||||
if os.path.isdir(repo_path) and os.listdir(repo_path):
|
||||
return repo_path
|
||||
|
||||
print(f"Processing Repository {repo_host_path}")
|
||||
try:
|
||||
repo = Repo.clone_from(repo_remote, repo_path)
|
||||
repo.close()
|
||||
except:
|
||||
print(f"Cant clone {repo_url}")
|
||||
print(f"Processing Repository {repo_host_path}")
|
||||
try:
|
||||
repo = Repo.clone_from(repo_remote, repo_path)
|
||||
repo.close()
|
||||
except:
|
||||
print(f"Cant clone {repo_url}")
|
||||
return repo_path
|
||||
return repo_path
|
||||
return repo_path
|
||||
|
||||
def parse_fileglob():
|
||||
|
||||
glob_file = Path(os.environ.get('GLOB_FILE', 'globfile'))
|
||||
ret_val = []
|
||||
if not glob_file.is_file():
|
||||
print(f"Specified glob file {glob_file} not found. Searching all files.")
|
||||
return []
|
||||
|
||||
with open(glob_file, 'r') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
for line in lines:
|
||||
ret_val.append("--glob")
|
||||
ret_val.append(line.rstrip())
|
||||
|
||||
print(ret_val)
|
||||
return ret_val
|
||||
|
||||
def scan_repo(path=None, repo=None):
|
||||
|
||||
scan_result = None
|
||||
scan_result_lines = []
|
||||
scan_matches = []
|
||||
@@ -114,10 +186,10 @@ def scan_repo(path=None, repo=None):
|
||||
"--json",
|
||||
"-i",
|
||||
"-f",
|
||||
"patternfile",
|
||||
path
|
||||
pattern_file
|
||||
]
|
||||
|
||||
ripgrep_cmd = ripgrep_cmd + fileglob
|
||||
ripgrep_cmd.append(path)
|
||||
try:
|
||||
scan_result = subprocess.run(ripgrep_cmd, capture_output=True, text=True)
|
||||
except:
|
||||
@@ -125,7 +197,6 @@ def scan_repo(path=None, repo=None):
|
||||
return []
|
||||
|
||||
scan_out_lines = list(filter(None, scan_result.stdout.split('\n')))
|
||||
|
||||
for line in scan_out_lines:
|
||||
line_data = json.loads(line)
|
||||
if line_data.get("type") == "match":
|
||||
@@ -139,13 +210,15 @@ def scan_repo(path=None, repo=None):
|
||||
return scan_matches
|
||||
|
||||
def evaluate_findings(findings=[]):
|
||||
if not findings:
|
||||
return []
|
||||
|
||||
finding_results = []
|
||||
for finding in findings:
|
||||
filename = finding['full_path'].split('/')[-1]
|
||||
if filename.startswith("package"):
|
||||
print(f"Found potential match - {finding['path']} - {finding['matches'][0]['match']['text']}")
|
||||
detail = check_line_in_file(file=finding['full_path'], line_number=finding['line_number'])
|
||||
finding_results += [[finding['repo'], finding['path'], finding['line_number'], detail.lstrip(),finding['matches'][0]['match']['text']]]
|
||||
print(f"Found potential match - {finding['path']} - {finding['matches'][0]['match']['text']}")
|
||||
detail = check_line_in_file(file=finding['full_path'], line_number=finding['line_number'])
|
||||
finding_results += [[finding['repo'], finding['path'], finding['line_number'], detail.lstrip(),finding['matches'][0]['match']['text']]]
|
||||
return finding_results
|
||||
|
||||
def check_line_in_file(file=None, line_number=None):
|
||||
@@ -155,26 +228,31 @@ def check_line_in_file(file=None, line_number=None):
|
||||
print(line)
|
||||
return line.rstrip().replace(',', '')
|
||||
|
||||
|
||||
def check_repos():
|
||||
repos = get_all_projects(group_id=os.environ.get('GITLAB_GROUP'))
|
||||
print(f"Found {len(repos)} Repositories..")
|
||||
for repo in repos:
|
||||
scan_path = clone_repo_with_http(repo['http_url_to_repo'])
|
||||
gl = GitlabRepositories()
|
||||
gl.get_projects()
|
||||
print(f"Found {len(gl.projects)} Repositories..")
|
||||
|
||||
for repo in gl.projects:
|
||||
scan_path = gl.clone_repo(repo['http_url_to_repo'])
|
||||
findings = scan_repo(scan_path, repo['web_url'])
|
||||
if findings:
|
||||
print("Evaluating matches")
|
||||
finding_results = evaluate_findings(findings=findings)
|
||||
if finding_results:
|
||||
report.findings += finding_results
|
||||
print("Evaluating matches")
|
||||
finding_results = evaluate_findings(findings=findings)
|
||||
if finding_results:
|
||||
report.findings += finding_results
|
||||
subprocess.run(["rm", "-rf", scan_path])
|
||||
|
||||
git_tmp_root = os.environ.get('GIT_TMP_ROOT', '/tmp/hulud_check')
|
||||
report_path = os.environ.get('REPORT_PATH', '/tmp/hulud_check_reports')
|
||||
|
||||
git_tmp_root = os.environ.get('GIT_TMP_ROOT', '/tmp/repo_check')
|
||||
report_path = os.environ.get('REPORT_PATH', '/tmp/check_reports')
|
||||
report_file = os.environ.get('REPORT_FILE', 'report.csv')
|
||||
pattern_file = os.environ.get('PATTERN_FILE', 'patternfile')
|
||||
|
||||
fileglob = parse_fileglob()
|
||||
|
||||
Path(git_tmp_root).mkdir(parents=True, exist_ok=True)
|
||||
Path(report_path).mkdir(parents=True, exist_ok=True)
|
||||
session = GitlabConnector()
|
||||
report = Report()
|
||||
check_repos()
|
||||
report.results()
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/bash
|
||||
echo "Get most recent defintions"
|
||||
curl -s https://raw.githubusercontent.com/wiz-sec-public/wiz-research-iocs/refs/heads/main/reports/shai-hulud-2-packages.csv > sha1-hulud-2-packages.csv
|
||||
echo "Format patterns"
|
||||
tail -n +2 sha1-hulud-2-packages.csv | awk -F ',' '{print $1}' > patternfile
|
||||
echo "Running check"
|
||||
python3 -u check_gitlab.py
|
||||
2
patternfile
Normal file
2
patternfile
Normal file
@@ -0,0 +1,2 @@
|
||||
axios.*1\.4\.1
|
||||
axios.*0\.3\.0
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "hulud_check"
|
||||
description = "Quick hacky check for sha1-hulud"
|
||||
name = "gitlab_scanner"
|
||||
description = "Quick hacky Gitlab Repository scanner for searching patterns in files"
|
||||
version = "2025.0.0"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
|
||||
Reference in New Issue
Block a user