From c5b53c2d6cef1035ed9c1980e9b96259afe72792 Mon Sep 17 00:00:00 2001 From: Michael Sukkarieh Date: Thu, 19 Sep 2024 13:22:13 -0700 Subject: [PATCH] Sanitize posthog data and add more info in README (#4) * remove page view provider * sanatize current_url and ip properties in all posthog events * add posthog usage info in README * remove unneccessary ip property removal since we disable ip collection on posthog side * add back ip sanitization on client side (we disabled it on server side but may as well also clear it on client) and revise README on telemetry * add typo with asterisks in readme * small grammar fix in README --- README.md | 12 +++++++----- src/app/layout.tsx | 5 ----- src/app/posthogPageView.tsx | 28 ---------------------------- src/app/posthogProvider.tsx | 14 +++++++++++++- 4 files changed, 20 insertions(+), 39 deletions(-) delete mode 100644 src/app/posthogPageView.tsx diff --git a/README.md b/README.md index 4b644a67..39eadf40 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ Sourcebot also supports indexing GitLab & BitBucket. Checkout the [index.json](. zoekt will now index your repositories (at `HEAD`). By default, it will re-index existing repositories every hour, and discover new repositories every 24 hours. -4. Go to `http://localhost:3000` - once a index has been created, you can start searching. +4. Go to `http://localhost:3000` - once an index has been created, you can start searching. ## Building Sourcebot @@ -182,14 +182,16 @@ The zoekt binaries and web dependencies are placed into `bin` and `node_modules` A `.sourcebot` directory will be created and zoekt will begin to index the repositories found given `config.json`. -6. Go to `http://localhost:3000` - once a index has been created, you can start searching. +6. Go to `http://localhost:3000` - once an index has been created, you can start searching. -## Disabling Telemetry +## Telemetry -By default, Sourcebot collects anonymous usage data using [PostHog](https://posthog.com/). You can disable this by setting the environment variable `SOURCEBOT_TELEMETRY_DISABLED` to `1` in the docker run command: +By default, Sourcebot collects anonymized usage data through [PostHog](https://posthog.com/) to help us improve the performance and reliability of our tool. We do not collect or transmit [any information related to your codebase](https://github.com/search?q=repo:TaqlaAI/sourcebot++captureEvent&type=code). All events are [sanitized](https://github.com/TaqlaAI/sourcebot/blob/main/src/app/posthogProvider.tsx) to ensure that no sensitive or identifying details leave your machine. The data we collect includes general usage statistics and metadata such as query performance (e.g., search duration, error rates) to monitor the application's health and functionality. This information helps us better understand how Sourcebot is used and where improvements can be made :) + +If you'd like to disable all telemetry, you can do so by setting the environment variable `SOURCEBOT_TELEMETRY_DISABLED` to `1` in the docker run command: ```sh -docker run -e SOURCEBOT_TELEMETRY_DISABLED=1 ...stuff... ghcr.io/taqlaai/sourcebot:main +docker run -e SOURCEBOT_TELEMETRY_DISABLED=1 /* additional args */ ghcr.io/taqlaai/sourcebot:main ``` Or if you are building locally, add the following to your [.env](./.env) file: diff --git a/src/app/layout.tsx b/src/app/layout.tsx index d698c6ec..c807ad82 100644 --- a/src/app/layout.tsx +++ b/src/app/layout.tsx @@ -9,10 +9,6 @@ import dynamic from "next/dynamic"; const inter = Inter({ subsets: ["latin"] }); -const PostHogPageView = dynamic(() => import('./posthogPageView'), { - ssr: false, - }) - export const metadata: Metadata = { title: "Sourcebot", description: "Sourcebot", @@ -31,7 +27,6 @@ export default function RootLayout({ > - { - // Track pageviews - if (pathname && posthog) { - let url = window.origin + pathname - if (searchParams.toString()) { - url = url + `?${searchParams.toString()}` - } - posthog.capture( - '$pageview', - { - '$current_url': url, - } - ) - } - }, [pathname, searchParams, posthog]) - - return null -} \ No newline at end of file diff --git a/src/app/posthogProvider.tsx b/src/app/posthogProvider.tsx index 22ffc745..8c146ae2 100644 --- a/src/app/posthogProvider.tsx +++ b/src/app/posthogProvider.tsx @@ -9,7 +9,19 @@ if (typeof window !== 'undefined') { api_host: "/ingest", ui_host: NEXT_PUBLIC_POSTHOG_UI_HOST, person_profiles: 'identified_only', - capture_pageview: false, // Disable automatic pageview capture, as we capture manually + capture_pageview: false, // Disable automatic pageview capture + autocapture: false, // Disable automatic event capture + sanitize_properties: (properties: Record, _event: string) => { + // https://posthog.com/docs/libraries/js#config + if (properties['$current_url']) { + properties['$current_url'] = null; + } + if (properties['$ip']) { + properties['$ip'] = null; + } + + return properties; + } }); } else { console.log("PostHog telemetry disabled");